44 files changed, 16782 insertions, 0 deletions
diff --git a/arch/m68k/fpsp040/Makefile b/arch/m68k/fpsp040/Makefile
new file mode 100644
index 0000000..0214d2f
--- /dev/null
+++ b/arch/m68k/fpsp040/Makefile
@@ -0,0 +1,16 @@
+#
+# Makefile for Linux arch/m68k/fpsp040 source directory
+#
+
+obj-y    := bindec.o binstr.o decbin.o do_func.o gen_except.o get_op.o \
+	    kernel_ex.o res_func.o round.o sacos.o sasin.o satan.o satanh.o \
+	    scosh.o setox.o sgetem.o sint.o slog2.o slogn.o \
+	    smovecr.o srem_mod.o scale.o \
+	    ssin.o ssinh.o stan.o stanh.o sto_res.o stwotox.o tbldo.o util.o \
+	    x_bsun.o x_fline.o x_operr.o x_ovfl.o x_snan.o x_store.o \
+	    x_unfl.o x_unimp.o x_unsupp.o bugfix.o skeleton.o
+
+EXTRA_AFLAGS := -traditional
+EXTRA_LDFLAGS := -x
+
+$(OS_OBJS): fpsp.h
diff --git a/arch/m68k/fpsp040/README b/arch/m68k/fpsp040/README
new file mode 100644
index 0000000..f574944
--- /dev/null
+++ b/arch/m68k/fpsp040/README
@@ -0,0 +1,30 @@
+
+MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
+M68000 Hi-Performance Microprocessor Division
+M68040 Software Package
+
+M68040 Software Package Copyright (c) 1993, 1994 Motorola Inc.
+All rights reserved.
+
+THE SOFTWARE is provided on an "AS IS" basis and without warranty.
+To the maximum extent permitted by applicable law,
+MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
+INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A
+PARTICULAR PURPOSE and any warranty against infringement with
+regard to the SOFTWARE (INCLUDING ANY MODIFIED VERSIONS THEREOF)
+and any accompanying written materials.
+
+To the maximum extent permitted by applicable law,
+IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
+(INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS
+PROFITS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR
+OTHER PECUNIARY LOSS) ARISING OF THE USE OR INABILITY TO USE THE
+SOFTWARE.  Motorola assumes no responsibility for the maintenance
+and support of the SOFTWARE.
+
+You are hereby granted a copyright license to use, modify, and
+distribute the SOFTWARE so long as this entire notice is retained
+without alteration in any modified and/or redistributed versions,
+and that such modified versions are clearly identified as such.
+No licenses are granted by implication, estoppel or otherwise
+under any patents or trademarks of Motorola, Inc.
diff --git a/arch/m68k/fpsp040/bindec.S b/arch/m68k/fpsp040/bindec.S
new file mode 100644
index 0000000..3ba446a9
--- /dev/null
+++ b/arch/m68k/fpsp040/bindec.S
@@ -0,0 +1,920 @@
+|
+|	bindec.sa 3.4 1/3/91
+|
+|	bindec
+|
+|	Description:
+|		Converts an input in extended precision format
+|		to bcd format.
+|
+|	Input:
+|		a0 points to the input extended precision value
+|		value in memory; d0 contains the k-factor sign-extended
+|		to 32-bits.  The input may be either normalized,
+|		unnormalized, or denormalized.
+|
+|	Output:	result in the FP_SCR1 space on the stack.
+|
+|	Saves and Modifies: D2-D7,A2,FP2
+|
+|	Algorithm:
+|
+|	A1.	Set RM and size ext;  Set SIGMA = sign of input.
+|		The k-factor is saved for use in d7. Clear the
+|		BINDEC_FLG for separating normalized/denormalized
+|		input.  If input is unnormalized or denormalized,
+|		normalize it.
+|
+|	A2.	Set X = abs(input).
+|
+|	A3.	Compute ILOG.
+|		ILOG is the log base 10 of the input value.  It is
+|		approximated by adding e + 0.f when the original
+|		value is viewed as 2^^e * 1.f in extended precision.
+|		This value is stored in d6.
+|
+|	A4.	Clr INEX bit.
+|		The operation in A3 above may have set INEX2.
+|
+|	A5.	Set ICTR = 0;
+|		ICTR is a flag used in A13.  It must be set before the
+|		loop entry A6.
+|
+|	A6.	Calculate LEN.
+|		LEN is the number of digits to be displayed.  The
+|		k-factor can dictate either the total number of digits,
+|		if it is a positive number, or the number of digits
+|		after the decimal point which are to be included as
+|		significant.  See the 68882 manual for examples.
+|		If LEN is computed to be greater than 17, set OPERR in
+|		USER_FPSR.  LEN is stored in d4.
+|
+|	A7.	Calculate SCALE.
+|		SCALE is equal to 10^ISCALE, where ISCALE is the number
+|		of decimal places needed to insure LEN integer digits
+|		in the output before conversion to bcd. LAMBDA is the
+|		sign of ISCALE, used in A9. Fp1 contains
+|		10^^(abs(ISCALE)) using a rounding mode which is a
+|		function of the original rounding mode and the signs
+|		of ISCALE and X.  A table is given in the code.
+|
+|	A8.	Clr INEX; Force RZ.
+|		The operation in A3 above may have set INEX2.
+|		RZ mode is forced for the scaling operation to insure
+|		only one rounding error.  The grs bits are collected in
+|		the INEX flag for use in A10.
+|
+|	A9.	Scale X -> Y.
+|		The mantissa is scaled to the desired number of
+|		significant digits.  The excess digits are collected
+|		in INEX2.
+|
+|	A10.	Or in INEX.
+|		If INEX is set, round error occurred.  This is
+|		compensated for by 'or-ing' in the INEX2 flag to
+|		the lsb of Y.
+|
+|	A11.	Restore original FPCR; set size ext.
+|		Perform FINT operation in the user's rounding mode.
+|		Keep the size to extended.
+|
+|	A12.	Calculate YINT = FINT(Y) according to user's rounding
+|		mode.  The FPSP routine sintd0 is used.  The output
+|		is in fp0.
+|
+|	A13.	Check for LEN digits.
+|		If the int operation results in more than LEN digits,
+|		or less than LEN -1 digits, adjust ILOG and repeat from
+|		A6.  This test occurs only on the first pass.  If the
+|		result is exactly 10^LEN, decrement ILOG and divide
+|		the mantissa by 10.
+|
+|	A14.	Convert the mantissa to bcd.
+|		The binstr routine is used to convert the LEN digit
+|		mantissa to bcd in memory.  The input to binstr is
+|		to be a fraction; i.e. (mantissa)/10^LEN and adjusted
+|		such that the decimal point is to the left of bit 63.
+|		The bcd digits are stored in the correct position in
+|		the final string area in memory.
+|
+|	A15.	Convert the exponent to bcd.
+|		As in A14 above, the exp is converted to bcd and the
+|		digits are stored in the final string.
+|		Test the length of the final exponent string.  If the
+|		length is 4, set operr.
+|
+|	A16.	Write sign bits to final string.
+|
+|	Implementation Notes:
+|
+|	The registers are used as follows:
+|
+|		d0: scratch; LEN input to binstr
+|		d1: scratch
+|		d2: upper 32-bits of mantissa for binstr
+|		d3: scratch;lower 32-bits of mantissa for binstr
+|		d4: LEN
+|		d5: LAMBDA/ICTR
+|		d6: ILOG
+|		d7: k-factor
+|		a0: ptr for original operand/final result
+|		a1: scratch pointer
+|		a2: pointer to FP_X; abs(original value) in ext
+|		fp0: scratch
+|		fp1: scratch
+|		fp2: scratch
+|		F_SCR1:
+|		F_SCR2:
+|		L_SCR1:
+|		L_SCR2:
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|BINDEC    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+#include "fpsp.h"
+
+	|section	8
+
+| Constants in extended precision
+LOG2:	.long	0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
+LOG2UP1:	.long	0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
+
+| Constants in single precision
+FONE:	.long	0x3F800000,0x00000000,0x00000000,0x00000000
+FTWO:	.long	0x40000000,0x00000000,0x00000000,0x00000000
+FTEN:	.long	0x41200000,0x00000000,0x00000000,0x00000000
+F4933:	.long	0x459A2800,0x00000000,0x00000000,0x00000000
+
+RBDTBL:	.byte	0,0,0,0
+	.byte	3,3,2,2
+	.byte	3,2,2,3
+	.byte	2,3,3,2
+
+	|xref	binstr
+	|xref	sintdo
+	|xref	ptenrn,ptenrm,ptenrp
+
+	.global	bindec
+	.global	sc_mul
+bindec:
+	moveml	%d2-%d7/%a2,-(%a7)
+	fmovemx %fp0-%fp2,-(%a7)
+
+| A1. Set RM and size ext. Set SIGMA = sign input;
+|     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
+|     separating  normalized/denormalized input.  If the input
+|     is a denormalized number, set the BINDEC_FLG memory word
+|     to signal denorm.  If the input is unnormalized, normalize
+|     the input and test for denormalized result.
+|
+	fmovel	#rm_mode,%FPCR	|set RM and ext
+	movel	(%a0),L_SCR2(%a6)	|save exponent for sign check
+	movel	%d0,%d7		|move k-factor to d7
+	clrb	BINDEC_FLG(%a6)	|clr norm/denorm flag
+	movew	STAG(%a6),%d0	|get stag
+	andiw	#0xe000,%d0	|isolate stag bits
+	beq	A2_str		|if zero, input is norm
+|
+| Normalize the denorm
+|
+un_de_norm:
+	movew	(%a0),%d0
+	andiw	#0x7fff,%d0	|strip sign of normalized exp
+	movel	4(%a0),%d1
+	movel	8(%a0),%d2
+norm_loop:
+	subw	#1,%d0
+	lsll	#1,%d2
+	roxll	#1,%d1
+	tstl	%d1
+	bges	norm_loop
+|
+| Test if the normalized input is denormalized
+|
+	tstw	%d0
+	bgts	pos_exp		|if greater than zero, it is a norm
+	st	BINDEC_FLG(%a6)	|set flag for denorm
+pos_exp:
+	andiw	#0x7fff,%d0	|strip sign of normalized exp
+	movew	%d0,(%a0)
+	movel	%d1,4(%a0)
+	movel	%d2,8(%a0)
+
+| A2. Set X = abs(input).
+|
+A2_str:
+	movel	(%a0),FP_SCR2(%a6) | move input to work space
+	movel	4(%a0),FP_SCR2+4(%a6) | move input to work space
+	movel	8(%a0),FP_SCR2+8(%a6) | move input to work space
+	andil	#0x7fffffff,FP_SCR2(%a6) |create abs(X)
+
+| A3. Compute ILOG.
+|     ILOG is the log base 10 of the input value.  It is approx-
+|     imated by adding e + 0.f when the original value is viewed
+|     as 2^^e * 1.f in extended precision.  This value is stored
+|     in d6.
+|
+| Register usage:
+|	Input/Output
+|	d0: k-factor/exponent
+|	d2: x/x
+|	d3: x/x
+|	d4: x/x
+|	d5: x/x
+|	d6: x/ILOG
+|	d7: k-factor/Unchanged
+|	a0: ptr for original operand/final result
+|	a1: x/x
+|	a2: x/x
+|	fp0: x/float(ILOG)
+|	fp1: x/x
+|	fp2: x/x
+|	F_SCR1:x/x
+|	F_SCR2:Abs(X)/Abs(X) with $3fff exponent
+|	L_SCR1:x/x
+|	L_SCR2:first word of X packed/Unchanged
+
+	tstb	BINDEC_FLG(%a6)	|check for denorm
+	beqs	A3_cont		|if clr, continue with norm
+	movel	#-4933,%d6	|force ILOG = -4933
+	bras	A4_str
+A3_cont:
+	movew	FP_SCR2(%a6),%d0	|move exp to d0
+	movew	#0x3fff,FP_SCR2(%a6) |replace exponent with 0x3fff
+	fmovex	FP_SCR2(%a6),%fp0	|now fp0 has 1.f
+	subw	#0x3fff,%d0	|strip off bias
+	faddw	%d0,%fp0		|add in exp
+	fsubs	FONE,%fp0	|subtract off 1.0
+	fbge	pos_res		|if pos, branch
+	fmulx	LOG2UP1,%fp0	|if neg, mul by LOG2UP1
+	fmovel	%fp0,%d6		|put ILOG in d6 as a lword
+	bras	A4_str		|go move out ILOG
+pos_res:
+	fmulx	LOG2,%fp0	|if pos, mul by LOG2
+	fmovel	%fp0,%d6		|put ILOG in d6 as a lword
+
+
+| A4. Clr INEX bit.
+|     The operation in A3 above may have set INEX2.
+
+A4_str:
+	fmovel	#0,%FPSR		|zero all of fpsr - nothing needed
+
+
+| A5. Set ICTR = 0;
+|     ICTR is a flag used in A13.  It must be set before the
+|     loop entry A6. The lower word of d5 is used for ICTR.
+
+	clrw	%d5		|clear ICTR
+
+
+| A6. Calculate LEN.
+|     LEN is the number of digits to be displayed.  The k-factor
+|     can dictate either the total number of digits, if it is
+|     a positive number, or the number of digits after the
+|     original decimal point which are to be included as
+|     significant.  See the 68882 manual for examples.
+|     If LEN is computed to be greater than 17, set OPERR in
+|     USER_FPSR.  LEN is stored in d4.
+|
+| Register usage:
+|	Input/Output
+|	d0: exponent/Unchanged
+|	d2: x/x/scratch
+|	d3: x/x
+|	d4: exc picture/LEN
+|	d5: ICTR/Unchanged
+|	d6: ILOG/Unchanged
+|	d7: k-factor/Unchanged
+|	a0: ptr for original operand/final result
+|	a1: x/x
+|	a2: x/x
+|	fp0: float(ILOG)/Unchanged
+|	fp1: x/x
+|	fp2: x/x
+|	F_SCR1:x/x
+|	F_SCR2:Abs(X) with $3fff exponent/Unchanged
+|	L_SCR1:x/x
+|	L_SCR2:first word of X packed/Unchanged
+
+A6_str:
+	tstl	%d7		|branch on sign of k
+	bles	k_neg		|if k <= 0, LEN = ILOG + 1 - k
+	movel	%d7,%d4		|if k > 0, LEN = k
+	bras	len_ck		|skip to LEN check
+k_neg:
+	movel	%d6,%d4		|first load ILOG to d4
+	subl	%d7,%d4		|subtract off k
+	addql	#1,%d4		|add in the 1
+len_ck:
+	tstl	%d4		|LEN check: branch on sign of LEN
+	bles	LEN_ng		|if neg, set LEN = 1
+	cmpl	#17,%d4		|test if LEN > 17
+	bles	A7_str		|if not, forget it
+	movel	#17,%d4		|set max LEN = 17
+	tstl	%d7		|if negative, never set OPERR
+	bles	A7_str		|if positive, continue
+	orl	#opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
+	bras	A7_str		|finished here
+LEN_ng:
+	moveql	#1,%d4		|min LEN is 1
+
+
+| A7. Calculate SCALE.
+|     SCALE is equal to 10^ISCALE, where ISCALE is the number
+|     of decimal places needed to insure LEN integer digits
+|     in the output before conversion to bcd. LAMBDA is the sign
+|     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
+|     the rounding mode as given in the following table (see
+|     Coonen, p. 7.23 as ref.; however, the SCALE variable is
+|     of opposite sign in bindec.sa from Coonen).
+|
+|	Initial					USE
+|	FPCR[6:5]	LAMBDA	SIGN(X)		FPCR[6:5]
+|	----------------------------------------------
+|	 RN	00	   0	   0		00/0	RN
+|	 RN	00	   0	   1		00/0	RN
+|	 RN	00	   1	   0		00/0	RN
+|	 RN	00	   1	   1		00/0	RN
+|	 RZ	01	   0	   0		11/3	RP
+|	 RZ	01	   0	   1		11/3	RP
+|	 RZ	01	   1	   0		10/2	RM
+|	 RZ	01	   1	   1		10/2	RM
+|	 RM	10	   0	   0		11/3	RP
+|	 RM	10	   0	   1		10/2	RM
+|	 RM	10	   1	   0		10/2	RM
+|	 RM	10	   1	   1		11/3	RP
+|	 RP	11	   0	   0		10/2	RM
+|	 RP	11	   0	   1		11/3	RP
+|	 RP	11	   1	   0		11/3	RP
+|	 RP	11	   1	   1		10/2	RM
+|
+| Register usage:
+|	Input/Output
+|	d0: exponent/scratch - final is 0
+|	d2: x/0 or 24 for A9
+|	d3: x/scratch - offset ptr into PTENRM array
+|	d4: LEN/Unchanged
+|	d5: 0/ICTR:LAMBDA
+|	d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
+|	d7: k-factor/Unchanged
+|	a0: ptr for original operand/final result
+|	a1: x/ptr to PTENRM array
+|	a2: x/x
+|	fp0: float(ILOG)/Unchanged
+|	fp1: x/10^ISCALE
+|	fp2: x/x
+|	F_SCR1:x/x
+|	F_SCR2:Abs(X) with $3fff exponent/Unchanged
+|	L_SCR1:x/x
+|	L_SCR2:first word of X packed/Unchanged
+
+A7_str:
+	tstl	%d7		|test sign of k
+	bgts	k_pos		|if pos and > 0, skip this
+	cmpl	%d6,%d7		|test k - ILOG
+	blts	k_pos		|if ILOG >= k, skip this
+	movel	%d7,%d6		|if ((k<0) & (ILOG < k)) ILOG = k
+k_pos:
+	movel	%d6,%d0		|calc ILOG + 1 - LEN in d0
+	addql	#1,%d0		|add the 1
+	subl	%d4,%d0		|sub off LEN
+	swap	%d5		|use upper word of d5 for LAMBDA
+	clrw	%d5		|set it zero initially
+	clrw	%d2		|set up d2 for very small case
+	tstl	%d0		|test sign of ISCALE
+	bges	iscale		|if pos, skip next inst
+	addqw	#1,%d5		|if neg, set LAMBDA true
+	cmpl	#0xffffecd4,%d0	|test iscale <= -4908
+	bgts	no_inf		|if false, skip rest
+	addil	#24,%d0		|add in 24 to iscale
+	movel	#24,%d2		|put 24 in d2 for A9
+no_inf:
+	negl	%d0		|and take abs of ISCALE
+iscale:
+	fmoves	FONE,%fp1	|init fp1 to 1
+	bfextu	USER_FPCR(%a6){#26:#2},%d1 |get initial rmode bits
+	lslw	#1,%d1		|put them in bits 2:1
+	addw	%d5,%d1		|add in LAMBDA
+	lslw	#1,%d1		|put them in bits 3:1
+	tstl	L_SCR2(%a6)	|test sign of original x
+	bges	x_pos		|if pos, don't set bit 0
+	addql	#1,%d1		|if neg, set bit 0
+x_pos:
+	leal	RBDTBL,%a2	|load rbdtbl base
+	moveb	(%a2,%d1),%d3	|load d3 with new rmode
+	lsll	#4,%d3		|put bits in proper position
+	fmovel	%d3,%fpcr		|load bits into fpu
+	lsrl	#4,%d3		|put bits in proper position
+	tstb	%d3		|decode new rmode for pten table
+	bnes	not_rn		|if zero, it is RN
+	leal	PTENRN,%a1	|load a1 with RN table base
+	bras	rmode		|exit decode
+not_rn:
+	lsrb	#1,%d3		|get lsb in carry
+	bccs	not_rp		|if carry clear, it is RM
+	leal	PTENRP,%a1	|load a1 with RP table base
+	bras	rmode		|exit decode
+not_rp:
+	leal	PTENRM,%a1	|load a1 with RM table base
+rmode:
+	clrl	%d3		|clr table index
+e_loop:
+	lsrl	#1,%d0		|shift next bit into carry
+	bccs	e_next		|if zero, skip the mul
+	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
+e_next:
+	addl	#12,%d3		|inc d3 to next pwrten table entry
+	tstl	%d0		|test if ISCALE is zero
+	bnes	e_loop		|if not, loop
+
+
+| A8. Clr INEX; Force RZ.
+|     The operation in A3 above may have set INEX2.
+|     RZ mode is forced for the scaling operation to insure
+|     only one rounding error.  The grs bits are collected in
+|     the INEX flag for use in A10.
+|
+| Register usage:
+|	Input/Output
+
+	fmovel	#0,%FPSR		|clr INEX
+	fmovel	#rz_mode,%FPCR	|set RZ rounding mode
+
+
+| A9. Scale X -> Y.
+|     The mantissa is scaled to the desired number of significant
+|     digits.  The excess digits are collected in INEX2. If mul,
+|     Check d2 for excess 10 exponential value.  If not zero,
+|     the iscale value would have caused the pwrten calculation
+|     to overflow.  Only a negative iscale can cause this, so
+|     multiply by 10^(d2), which is now only allowed to be 24,
+|     with a multiply by 10^8 and 10^16, which is exact since
+|     10^24 is exact.  If the input was denormalized, we must
+|     create a busy stack frame with the mul command and the
+|     two operands, and allow the fpu to complete the multiply.
+|
+| Register usage:
+|	Input/Output
+|	d0: FPCR with RZ mode/Unchanged
+|	d2: 0 or 24/unchanged
+|	d3: x/x
+|	d4: LEN/Unchanged
+|	d5: ICTR:LAMBDA
+|	d6: ILOG/Unchanged
+|	d7: k-factor/Unchanged
+|	a0: ptr for original operand/final result
+|	a1: ptr to PTENRM array/Unchanged
+|	a2: x/x
+|	fp0: float(ILOG)/X adjusted for SCALE (Y)
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: x/x
+|	F_SCR1:x/x
+|	F_SCR2:Abs(X) with $3fff exponent/Unchanged
+|	L_SCR1:x/x
+|	L_SCR2:first word of X packed/Unchanged
+
+A9_str:
+	fmovex	(%a0),%fp0	|load X from memory
+	fabsx	%fp0		|use abs(X)
+	tstw	%d5		|LAMBDA is in lower word of d5
+	bne	sc_mul		|if neg (LAMBDA = 1), scale by mul
+	fdivx	%fp1,%fp0		|calculate X / SCALE -> Y to fp0
+	bras	A10_st		|branch to A10
+
+sc_mul:
+	tstb	BINDEC_FLG(%a6)	|check for denorm
+	beqs	A9_norm		|if norm, continue with mul
+	fmovemx %fp1-%fp1,-(%a7)	|load ETEMP with 10^ISCALE
+	movel	8(%a0),-(%a7)	|load FPTEMP with input arg
+	movel	4(%a0),-(%a7)
+	movel	(%a0),-(%a7)
+	movel	#18,%d3		|load count for busy stack
+A9_loop:
+	clrl	-(%a7)		|clear lword on stack
+	dbf	%d3,A9_loop
+	moveb	VER_TMP(%a6),(%a7) |write current version number
+	moveb	#BUSY_SIZE-4,1(%a7) |write current busy size
+	moveb	#0x10,0x44(%a7)	|set fcefpte[15] bit
+	movew	#0x0023,0x40(%a7)	|load cmdreg1b with mul command
+	moveb	#0xfe,0x8(%a7)	|load all 1s to cu savepc
+	frestore (%a7)+		|restore frame to fpu for completion
+	fmulx	36(%a1),%fp0	|multiply fp0 by 10^8
+	fmulx	48(%a1),%fp0	|multiply fp0 by 10^16
+	bras	A10_st
+A9_norm:
+	tstw	%d2		|test for small exp case
+	beqs	A9_con		|if zero, continue as normal
+	fmulx	36(%a1),%fp0	|multiply fp0 by 10^8
+	fmulx	48(%a1),%fp0	|multiply fp0 by 10^16
+A9_con:
+	fmulx	%fp1,%fp0		|calculate X * SCALE -> Y to fp0
+
+
+| A10. Or in INEX.
+|      If INEX is set, round error occurred.  This is compensated
+|      for by 'or-ing' in the INEX2 flag to the lsb of Y.
+|
+| Register usage:
+|	Input/Output
+|	d0: FPCR with RZ mode/FPSR with INEX2 isolated
+|	d2: x/x
+|	d3: x/x
+|	d4: LEN/Unchanged
+|	d5: ICTR:LAMBDA
+|	d6: ILOG/Unchanged
+|	d7: k-factor/Unchanged
+|	a0: ptr for original operand/final result
+|	a1: ptr to PTENxx array/Unchanged
+|	a2: x/ptr to FP_SCR2(a6)
+|	fp0: Y/Y with lsb adjusted
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: x/x
+
+A10_st:
+	fmovel	%FPSR,%d0		|get FPSR
+	fmovex	%fp0,FP_SCR2(%a6)	|move Y to memory
+	leal	FP_SCR2(%a6),%a2	|load a2 with ptr to FP_SCR2
+	btstl	#9,%d0		|check if INEX2 set
+	beqs	A11_st		|if clear, skip rest
+	oril	#1,8(%a2)	|or in 1 to lsb of mantissa
+	fmovex	FP_SCR2(%a6),%fp0	|write adjusted Y back to fpu
+
+
+| A11. Restore original FPCR; set size ext.
+|      Perform FINT operation in the user's rounding mode.  Keep
+|      the size to extended.  The sintdo entry point in the sint
+|      routine expects the FPCR value to be in USER_FPCR for
+|      mode and precision.  The original FPCR is saved in L_SCR1.
+
+A11_st:
+	movel	USER_FPCR(%a6),L_SCR1(%a6) |save it for later
+	andil	#0x00000030,USER_FPCR(%a6) |set size to ext,
+|					;block exceptions
+
+
+| A12. Calculate YINT = FINT(Y) according to user's rounding mode.
+|      The FPSP routine sintd0 is used.  The output is in fp0.
+|
+| Register usage:
+|	Input/Output
+|	d0: FPSR with AINEX cleared/FPCR with size set to ext
+|	d2: x/x/scratch
+|	d3: x/x
+|	d4: LEN/Unchanged
+|	d5: ICTR:LAMBDA/Unchanged
+|	d6: ILOG/Unchanged
+|	d7: k-factor/Unchanged
+|	a0: ptr for original operand/src ptr for sintdo
+|	a1: ptr to PTENxx array/Unchanged
+|	a2: ptr to FP_SCR2(a6)/Unchanged
+|	a6: temp pointer to FP_SCR2(a6) - orig value saved and restored
+|	fp0: Y/YINT
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: x/x
+|	F_SCR1:x/x
+|	F_SCR2:Y adjusted for inex/Y with original exponent
+|	L_SCR1:x/original USER_FPCR
+|	L_SCR2:first word of X packed/Unchanged
+
+A12_st:
+	moveml	%d0-%d1/%a0-%a1,-(%a7)	|save regs used by sintd0
+	movel	L_SCR1(%a6),-(%a7)
+	movel	L_SCR2(%a6),-(%a7)
+	leal	FP_SCR2(%a6),%a0		|a0 is ptr to F_SCR2(a6)
+	fmovex	%fp0,(%a0)		|move Y to memory at FP_SCR2(a6)
+	tstl	L_SCR2(%a6)		|test sign of original operand
+	bges	do_fint			|if pos, use Y
+	orl	#0x80000000,(%a0)		|if neg, use -Y
+do_fint:
+	movel	USER_FPSR(%a6),-(%a7)
+	bsr	sintdo			|sint routine returns int in fp0
+	moveb	(%a7),USER_FPSR(%a6)
+	addl	#4,%a7
+	movel	(%a7)+,L_SCR2(%a6)
+	movel	(%a7)+,L_SCR1(%a6)
+	moveml	(%a7)+,%d0-%d1/%a0-%a1	|restore regs used by sint
+	movel	L_SCR2(%a6),FP_SCR2(%a6)	|restore original exponent
+	movel	L_SCR1(%a6),USER_FPCR(%a6) |restore user's FPCR
+
+
+| A13. Check for LEN digits.
+|      If the int operation results in more than LEN digits,
+|      or less than LEN -1 digits, adjust ILOG and repeat from
+|      A6.  This test occurs only on the first pass.  If the
+|      result is exactly 10^LEN, decrement ILOG and divide
+|      the mantissa by 10.  The calculation of 10^LEN cannot
+|      be inexact, since all powers of ten upto 10^27 are exact
+|      in extended precision, so the use of a previous power-of-ten
+|      table will introduce no error.
+|
+|
+| Register usage:
+|	Input/Output
+|	d0: FPCR with size set to ext/scratch final = 0
+|	d2: x/x
+|	d3: x/scratch final = x
+|	d4: LEN/LEN adjusted
+|	d5: ICTR:LAMBDA/LAMBDA:ICTR
+|	d6: ILOG/ILOG adjusted
+|	d7: k-factor/Unchanged
+|	a0: pointer into memory for packed bcd string formation
+|	a1: ptr to PTENxx array/Unchanged
+|	a2: ptr to FP_SCR2(a6)/Unchanged
+|	fp0: int portion of Y/abs(YINT) adjusted
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: x/10^LEN
+|	F_SCR1:x/x
+|	F_SCR2:Y with original exponent/Unchanged
+|	L_SCR1:original USER_FPCR/Unchanged
+|	L_SCR2:first word of X packed/Unchanged
+
+A13_st:
+	swap	%d5		|put ICTR in lower word of d5
+	tstw	%d5		|check if ICTR = 0
+	bne	not_zr		|if non-zero, go to second test
+|
+| Compute 10^(LEN-1)
+|
+	fmoves	FONE,%fp2	|init fp2 to 1.0
+	movel	%d4,%d0		|put LEN in d0
+	subql	#1,%d0		|d0 = LEN -1
+	clrl	%d3		|clr table index
+l_loop:
+	lsrl	#1,%d0		|shift next bit into carry
+	bccs	l_next		|if zero, skip the mul
+	fmulx	(%a1,%d3),%fp2	|mul by 10**(d3_bit_no)
+l_next:
+	addl	#12,%d3		|inc d3 to next pwrten table entry
+	tstl	%d0		|test if LEN is zero
+	bnes	l_loop		|if not, loop
+|
+| 10^LEN-1 is computed for this test and A14.  If the input was
+| denormalized, check only the case in which YINT > 10^LEN.
+|
+	tstb	BINDEC_FLG(%a6)	|check if input was norm
+	beqs	A13_con		|if norm, continue with checking
+	fabsx	%fp0		|take abs of YINT
+	bra	test_2
+|
+| Compare abs(YINT) to 10^(LEN-1) and 10^LEN
+|
+A13_con:
+	fabsx	%fp0		|take abs of YINT
+	fcmpx	%fp2,%fp0		|compare abs(YINT) with 10^(LEN-1)
+	fbge	test_2		|if greater, do next test
+	subql	#1,%d6		|subtract 1 from ILOG
+	movew	#1,%d5		|set ICTR
+	fmovel	#rm_mode,%FPCR	|set rmode to RM
+	fmuls	FTEN,%fp2	|compute 10^LEN
+	bra	A6_str		|return to A6 and recompute YINT
+test_2:
+	fmuls	FTEN,%fp2	|compute 10^LEN
+	fcmpx	%fp2,%fp0		|compare abs(YINT) with 10^LEN
+	fblt	A14_st		|if less, all is ok, go to A14
+	fbgt	fix_ex		|if greater, fix and redo
+	fdivs	FTEN,%fp0	|if equal, divide by 10
+	addql	#1,%d6		| and inc ILOG
+	bras	A14_st		| and continue elsewhere
+fix_ex:
+	addql	#1,%d6		|increment ILOG by 1
+	movew	#1,%d5		|set ICTR
+	fmovel	#rm_mode,%FPCR	|set rmode to RM
+	bra	A6_str		|return to A6 and recompute YINT
+|
+| Since ICTR <> 0, we have already been through one adjustment,
+| and shouldn't have another; this is to check if abs(YINT) = 10^LEN
+| 10^LEN is again computed using whatever table is in a1 since the
+| value calculated cannot be inexact.
+|
+not_zr:
+	fmoves	FONE,%fp2	|init fp2 to 1.0
+	movel	%d4,%d0		|put LEN in d0
+	clrl	%d3		|clr table index
+z_loop:
+	lsrl	#1,%d0		|shift next bit into carry
+	bccs	z_next		|if zero, skip the mul
+	fmulx	(%a1,%d3),%fp2	|mul by 10**(d3_bit_no)
+z_next:
+	addl	#12,%d3		|inc d3 to next pwrten table entry
+	tstl	%d0		|test if LEN is zero
+	bnes	z_loop		|if not, loop
+	fabsx	%fp0		|get abs(YINT)
+	fcmpx	%fp2,%fp0		|check if abs(YINT) = 10^LEN
+	fbne	A14_st		|if not, skip this
+	fdivs	FTEN,%fp0	|divide abs(YINT) by 10
+	addql	#1,%d6		|and inc ILOG by 1
+	addql	#1,%d4		| and inc LEN
+	fmuls	FTEN,%fp2	| if LEN++, the get 10^^LEN
+
+
+| A14. Convert the mantissa to bcd.
+|      The binstr routine is used to convert the LEN digit
+|      mantissa to bcd in memory.  The input to binstr is
+|      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
+|      such that the decimal point is to the left of bit 63.
+|      The bcd digits are stored in the correct position in
+|      the final string area in memory.
+|
+|
+| Register usage:
+|	Input/Output
+|	d0: x/LEN call to binstr - final is 0
+|	d1: x/0
+|	d2: x/ms 32-bits of mant of abs(YINT)
+|	d3: x/ls 32-bits of mant of abs(YINT)
+|	d4: LEN/Unchanged
+|	d5: ICTR:LAMBDA/LAMBDA:ICTR
+|	d6: ILOG
+|	d7: k-factor/Unchanged
+|	a0: pointer into memory for packed bcd string formation
+|	    /ptr to first mantissa byte in result string
+|	a1: ptr to PTENxx array/Unchanged
+|	a2: ptr to FP_SCR2(a6)/Unchanged
+|	fp0: int portion of Y/abs(YINT) adjusted
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: 10^LEN/Unchanged
+|	F_SCR1:x/Work area for final result
+|	F_SCR2:Y with original exponent/Unchanged
+|	L_SCR1:original USER_FPCR/Unchanged
+|	L_SCR2:first word of X packed/Unchanged
+
+A14_st:
+	fmovel	#rz_mode,%FPCR	|force rz for conversion
+	fdivx	%fp2,%fp0		|divide abs(YINT) by 10^LEN
+	leal	FP_SCR1(%a6),%a0
+	fmovex	%fp0,(%a0)	|move abs(YINT)/10^LEN to memory
+	movel	4(%a0),%d2	|move 2nd word of FP_RES to d2
+	movel	8(%a0),%d3	|move 3rd word of FP_RES to d3
+	clrl	4(%a0)		|zero word 2 of FP_RES
+	clrl	8(%a0)		|zero word 3 of FP_RES
+	movel	(%a0),%d0		|move exponent to d0
+	swap	%d0		|put exponent in lower word
+	beqs	no_sft		|if zero, don't shift
+	subil	#0x3ffd,%d0	|sub bias less 2 to make fract
+	tstl	%d0		|check if > 1
+	bgts	no_sft		|if so, don't shift
+	negl	%d0		|make exp positive
+m_loop:
+	lsrl	#1,%d2		|shift d2:d3 right, add 0s
+	roxrl	#1,%d3		|the number of places
+	dbf	%d0,m_loop	|given in d0
+no_sft:
+	tstl	%d2		|check for mantissa of zero
+	bnes	no_zr		|if not, go on
+	tstl	%d3		|continue zero check
+	beqs	zer_m		|if zero, go directly to binstr
+no_zr:
+	clrl	%d1		|put zero in d1 for addx
+	addil	#0x00000080,%d3	|inc at bit 7
+	addxl	%d1,%d2		|continue inc
+	andil	#0xffffff80,%d3	|strip off lsb not used by 882
+zer_m:
+	movel	%d4,%d0		|put LEN in d0 for binstr call
+	addql	#3,%a0		|a0 points to M16 byte in result
+	bsr	binstr		|call binstr to convert mant
+
+
+| A15. Convert the exponent to bcd.
+|      As in A14 above, the exp is converted to bcd and the
+|      digits are stored in the final string.
+|
+|      Digits are stored in L_SCR1(a6) on return from BINDEC as:
+|
+|	 32               16 15                0
+|	-----------------------------------------
+|	|  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
+|	-----------------------------------------
+|
+| And are moved into their proper places in FP_SCR1.  If digit e4
+| is non-zero, OPERR is signaled.  In all cases, all 4 digits are
+| written as specified in the 881/882 manual for packed decimal.
+|
+| Register usage:
+|	Input/Output
+|	d0: x/LEN call to binstr - final is 0
+|	d1: x/scratch (0);shift count for final exponent packing
+|	d2: x/ms 32-bits of exp fraction/scratch
+|	d3: x/ls 32-bits of exp fraction
+|	d4: LEN/Unchanged
+|	d5: ICTR:LAMBDA/LAMBDA:ICTR
+|	d6: ILOG
+|	d7: k-factor/Unchanged
+|	a0: ptr to result string/ptr to L_SCR1(a6)
+|	a1: ptr to PTENxx array/Unchanged
+|	a2: ptr to FP_SCR2(a6)/Unchanged
+|	fp0: abs(YINT) adjusted/float(ILOG)
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: 10^LEN/Unchanged
+|	F_SCR1:Work area for final result/BCD result
+|	F_SCR2:Y with original exponent/ILOG/10^4
+|	L_SCR1:original USER_FPCR/Exponent digits on return from binstr
+|	L_SCR2:first word of X packed/Unchanged
+
+A15_st:
+	tstb	BINDEC_FLG(%a6)	|check for denorm
+	beqs	not_denorm
+	ftstx	%fp0		|test for zero
+	fbeq	den_zero	|if zero, use k-factor or 4933
+	fmovel	%d6,%fp0		|float ILOG
+	fabsx	%fp0		|get abs of ILOG
+	bras	convrt
+den_zero:
+	tstl	%d7		|check sign of the k-factor
+	blts	use_ilog	|if negative, use ILOG
+	fmoves	F4933,%fp0	|force exponent to 4933
+	bras	convrt		|do it
+use_ilog:
+	fmovel	%d6,%fp0		|float ILOG
+	fabsx	%fp0		|get abs of ILOG
+	bras	convrt
+not_denorm:
+	ftstx	%fp0		|test for zero
+	fbne	not_zero	|if zero, force exponent
+	fmoves	FONE,%fp0	|force exponent to 1
+	bras	convrt		|do it
+not_zero:
+	fmovel	%d6,%fp0		|float ILOG
+	fabsx	%fp0		|get abs of ILOG
+convrt:
+	fdivx	24(%a1),%fp0	|compute ILOG/10^4
+	fmovex	%fp0,FP_SCR2(%a6)	|store fp0 in memory
+	movel	4(%a2),%d2	|move word 2 to d2
+	movel	8(%a2),%d3	|move word 3 to d3
+	movew	(%a2),%d0		|move exp to d0
+	beqs	x_loop_fin	|if zero, skip the shift
+	subiw	#0x3ffd,%d0	|subtract off bias
+	negw	%d0		|make exp positive
+x_loop:
+	lsrl	#1,%d2		|shift d2:d3 right
+	roxrl	#1,%d3		|the number of places
+	dbf	%d0,x_loop	|given in d0
+x_loop_fin:
+	clrl	%d1		|put zero in d1 for addx
+	addil	#0x00000080,%d3	|inc at bit 6
+	addxl	%d1,%d2		|continue inc
+	andil	#0xffffff80,%d3	|strip off lsb not used by 882
+	movel	#4,%d0		|put 4 in d0 for binstr call
+	leal	L_SCR1(%a6),%a0	|a0 is ptr to L_SCR1 for exp digits
+	bsr	binstr		|call binstr to convert exp
+	movel	L_SCR1(%a6),%d0	|load L_SCR1 lword to d0
+	movel	#12,%d1		|use d1 for shift count
+	lsrl	%d1,%d0		|shift d0 right by 12
+	bfins	%d0,FP_SCR1(%a6){#4:#12} |put e3:e2:e1 in FP_SCR1
+	lsrl	%d1,%d0		|shift d0 right by 12
+	bfins	%d0,FP_SCR1(%a6){#16:#4} |put e4 in FP_SCR1
+	tstb	%d0		|check if e4 is zero
+	beqs	A16_st		|if zero, skip rest
+	orl	#opaop_mask,USER_FPSR(%a6) |set OPERR & AIOP in USER_FPSR
+
+
+| A16. Write sign bits to final string.
+|	   Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
+|
+| Register usage:
+|	Input/Output
+|	d0: x/scratch - final is x
+|	d2: x/x
+|	d3: x/x
+|	d4: LEN/Unchanged
+|	d5: ICTR:LAMBDA/LAMBDA:ICTR
+|	d6: ILOG/ILOG adjusted
+|	d7: k-factor/Unchanged
+|	a0: ptr to L_SCR1(a6)/Unchanged
+|	a1: ptr to PTENxx array/Unchanged
+|	a2: ptr to FP_SCR2(a6)/Unchanged
+|	fp0: float(ILOG)/Unchanged
+|	fp1: 10^ISCALE/Unchanged
+|	fp2: 10^LEN/Unchanged
+|	F_SCR1:BCD result with correct signs
+|	F_SCR2:ILOG/10^4
+|	L_SCR1:Exponent digits on return from binstr
+|	L_SCR2:first word of X packed/Unchanged
+
+A16_st:
+	clrl	%d0		|clr d0 for collection of signs
+	andib	#0x0f,FP_SCR1(%a6) |clear first nibble of FP_SCR1
+	tstl	L_SCR2(%a6)	|check sign of original mantissa
+	bges	mant_p		|if pos, don't set SM
+	moveql	#2,%d0		|move 2 in to d0 for SM
+mant_p:
+	tstl	%d6		|check sign of ILOG
+	bges	wr_sgn		|if pos, don't set SE
+	addql	#1,%d0		|set bit 0 in d0 for SE
+wr_sgn:
+	bfins	%d0,FP_SCR1(%a6){#0:#2} |insert SM and SE into FP_SCR1
+
+| Clean up and restore all registers used.
+
+	fmovel	#0,%FPSR		|clear possible inex2/ainex bits
+	fmovemx (%a7)+,%fp0-%fp2
+	moveml	(%a7)+,%d2-%d7/%a2
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/binstr.S b/arch/m68k/fpsp040/binstr.S
new file mode 100644
index 0000000..d53555c
--- /dev/null
+++ b/arch/m68k/fpsp040/binstr.S
@@ -0,0 +1,140 @@
+|
+|	binstr.sa 3.3 12/19/90
+|
+|
+|	Description: Converts a 64-bit binary integer to bcd.
+|
+|	Input: 64-bit binary integer in d2:d3, desired length (LEN) in
+|          d0, and a  pointer to start in memory for bcd characters
+|          in d0. (This pointer must point to byte 4 of the first
+|          lword of the packed decimal memory string.)
+|
+|	Output:	LEN bcd digits representing the 64-bit integer.
+|
+|	Algorithm:
+|		The 64-bit binary is assumed to have a decimal point before
+|		bit 63.  The fraction is multiplied by 10 using a mul by 2
+|		shift and a mul by 8 shift.  The bits shifted out of the
+|		msb form a decimal digit.  This process is iterated until
+|		LEN digits are formed.
+|
+|	A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the
+|		digit formed will be assumed the least significant.  This is
+|		to force the first byte formed to have a 0 in the upper 4 bits.
+|
+|	A2. Beginning of the loop:
+|		Copy the fraction in d2:d3 to d4:d5.
+|
+|	A3. Multiply the fraction in d2:d3 by 8 using bit-field
+|		extracts and shifts.  The three msbs from d2 will go into
+|		d1.
+|
+|	A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb
+|		will be collected by the carry.
+|
+|	A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5
+|		into d2:d3.  D1 will contain the bcd digit formed.
+|
+|	A6. Test d7.  If zero, the digit formed is the ms digit.  If non-
+|		zero, it is the ls digit.  Put the digit in its place in the
+|		upper word of d0.  If it is the ls digit, write the word
+|		from d0 to memory.
+|
+|	A7. Decrement d6 (LEN counter) and repeat the loop until zero.
+|
+|	Implementation Notes:
+|
+|	The registers are used as follows:
+|
+|		d0: LEN counter
+|		d1: temp used to form the digit
+|		d2: upper 32-bits of fraction for mul by 8
+|		d3: lower 32-bits of fraction for mul by 8
+|		d4: upper 32-bits of fraction for mul by 2
+|		d5: lower 32-bits of fraction for mul by 2
+|		d6: temp for bit-field extracts
+|		d7: byte digit formation word;digit count {0,1}
+|		a0: pointer into memory for packed bcd string formation
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|BINSTR    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	.global	binstr
+binstr:
+	moveml	%d0-%d7,-(%a7)
+|
+| A1: Init d7
+|
+	moveql	#1,%d7			|init d7 for second digit
+	subql	#1,%d0			|for dbf d0 would have LEN+1 passes
+|
+| A2. Copy d2:d3 to d4:d5.  Start loop.
+|
+loop:
+	movel	%d2,%d4			|copy the fraction before muls
+	movel	%d3,%d5			|to d4:d5
+|
+| A3. Multiply d2:d3 by 8; extract msbs into d1.
+|
+	bfextu	%d2{#0:#3},%d1		|copy 3 msbs of d2 into d1
+	asll	#3,%d2			|shift d2 left by 3 places
+	bfextu	%d3{#0:#3},%d6		|copy 3 msbs of d3 into d6
+	asll	#3,%d3			|shift d3 left by 3 places
+	orl	%d6,%d2			|or in msbs from d3 into d2
+|
+| A4. Multiply d4:d5 by 2; add carry out to d1.
+|
+	asll	#1,%d5			|mul d5 by 2
+	roxll	#1,%d4			|mul d4 by 2
+	swap	%d6			|put 0 in d6 lower word
+	addxw	%d6,%d1			|add in extend from mul by 2
+|
+| A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
+|
+	addl	%d5,%d3			|add lower 32 bits
+	nop				|ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
+	addxl	%d4,%d2			|add with extend upper 32 bits
+	nop				|ERRATA ; FIX #13 (Rev. 1.2 6/6/90)
+	addxw	%d6,%d1			|add in extend from add to d1
+	swap	%d6			|with d6 = 0; put 0 in upper word
+|
+| A6. Test d7 and branch.
+|
+	tstw	%d7			|if zero, store digit & to loop
+	beqs	first_d			|if non-zero, form byte & write
+sec_d:
+	swap	%d7			|bring first digit to word d7b
+	aslw	#4,%d7			|first digit in upper 4 bits d7b
+	addw	%d1,%d7			|add in ls digit to d7b
+	moveb	%d7,(%a0)+		|store d7b byte in memory
+	swap	%d7			|put LEN counter in word d7a
+	clrw	%d7			|set d7a to signal no digits done
+	dbf	%d0,loop		|do loop some more!
+	bras	end_bstr		|finished, so exit
+first_d:
+	swap	%d7			|put digit word in d7b
+	movew	%d1,%d7			|put new digit in d7b
+	swap	%d7			|put LEN counter in word d7a
+	addqw	#1,%d7			|set d7a to signal first digit done
+	dbf	%d0,loop		|do loop some more!
+	swap	%d7			|put last digit in string
+	lslw	#4,%d7			|move it to upper 4 bits
+	moveb	%d7,(%a0)+		|store it in memory string
+|
+| Clean up and return with result in fp0.
+|
+end_bstr:
+	moveml	(%a7)+,%d0-%d7
+	rts
+	|end
diff --git a/arch/m68k/fpsp040/bugfix.S b/arch/m68k/fpsp040/bugfix.S
new file mode 100644
index 0000000..942c4f6
--- /dev/null
+++ b/arch/m68k/fpsp040/bugfix.S
@@ -0,0 +1,496 @@
+|
+|	bugfix.sa 3.2 1/31/91
+|
+|
+|	This file contains workarounds for bugs in the 040
+|	relating to the Floating-Point Software Package (FPSP)
+|
+|	Fixes for bugs: 1238
+|
+|	Bug: 1238
+|
+|
+|    /* The following dirty_bit clear should be left in
+|     * the handler permanently to improve throughput.
+|     * The dirty_bits are located at bits [23:16] in
+|     * longword $08 in the busy frame $4x60.  Bit 16
+|     * corresponds to FP0, bit 17 corresponds to FP1,
+|     * and so on.
+|     */
+|    if  (E3_exception_just_serviced)   {
+|         dirty_bit[cmdreg3b[9:7]] = 0;
+|         }
+|
+|    if  (fsave_format_version != $40)  {goto NOFIX}
+|
+|    if !(E3_exception_just_serviced)   {goto NOFIX}
+|    if  (cupc == 0000000)              {goto NOFIX}
+|    if  ((cmdreg1b[15:13] != 000) &&
+|         (cmdreg1b[15:10] != 010001))  {goto NOFIX}
+|    if (((cmdreg1b[15:13] != 000) || ((cmdreg1b[12:10] != cmdreg2b[9:7]) &&
+|				      (cmdreg1b[12:10] != cmdreg3b[9:7]))  ) &&
+|	 ((cmdreg1b[ 9: 7] != cmdreg2b[9:7]) &&
+|	  (cmdreg1b[ 9: 7] != cmdreg3b[9:7])) )  {goto NOFIX}
+|
+|    /* Note: for 6d43b or 8d43b, you may want to add the following code
+|     * to get better coverage.  (If you do not insert this code, the part
+|     * won't lock up; it will simply get the wrong answer.)
+|     * Do NOT insert this code for 10d43b or later parts.
+|     *
+|     *  if (fpiarcu == integer stack return address) {
+|     *       cupc = 0000000;
+|     *       goto NOFIX;
+|     *       }
+|     */
+|
+|    if (cmdreg1b[15:13] != 000)   {goto FIX_OPCLASS2}
+|    FIX_OPCLASS0:
+|    if (((cmdreg1b[12:10] == cmdreg2b[9:7]) ||
+|	 (cmdreg1b[ 9: 7] == cmdreg2b[9:7])) &&
+|	(cmdreg1b[12:10] != cmdreg3b[9:7]) &&
+|	(cmdreg1b[ 9: 7] != cmdreg3b[9:7]))  {  /* xu conflict only */
+|	/* We execute the following code if there is an
+|	   xu conflict and NOT an nu conflict */
+|
+|	/* first save some values on the fsave frame */
+|	stag_temp     = STAG[fsave_frame];
+|	cmdreg1b_temp = CMDREG1B[fsave_frame];
+|	dtag_temp     = DTAG[fsave_frame];
+|	ete15_temp    = ETE15[fsave_frame];
+|
+|	CUPC[fsave_frame] = 0000000;
+|	FRESTORE
+|	FSAVE
+|
+|	/* If the xu instruction is exceptional, we punt.
+|	 * Otherwise, we would have to include OVFL/UNFL handler
+|	 * code here to get the correct answer.
+|	 */
+|	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
+|
+|	fsave_frame = /* build a long frame of all zeros */
+|	fsave_frame_format = $4060;  /* label it as long frame */
+|
+|	/* load it with the temps we saved */
+|	STAG[fsave_frame]     =  stag_temp;
+|	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
+|	DTAG[fsave_frame]     =  dtag_temp;
+|	ETE15[fsave_frame]    =  ete15_temp;
+|
+|	/* Make sure that the cmdreg3b dest reg is not going to
+|	 * be destroyed by a FMOVEM at the end of all this code.
+|	 * If it is, you should move the current value of the reg
+|	 * onto the stack so that the reg will loaded with that value.
+|	 */
+|
+|	/* All done.  Proceed with the code below */
+|    }
+|
+|    etemp  = FP_reg_[cmdreg1b[12:10]];
+|    ete15  = ~ete14;
+|    cmdreg1b[15:10] = 010010;
+|    clear(bug_flag_procIDxxxx);
+|    FRESTORE and return;
+|
+|
+|    FIX_OPCLASS2:
+|    if ((cmdreg1b[9:7] == cmdreg2b[9:7]) &&
+|	(cmdreg1b[9:7] != cmdreg3b[9:7]))  {  /* xu conflict only */
+|	/* We execute the following code if there is an
+|	   xu conflict and NOT an nu conflict */
+|
+|	/* first save some values on the fsave frame */
+|	stag_temp     = STAG[fsave_frame];
+|	cmdreg1b_temp = CMDREG1B[fsave_frame];
+|	dtag_temp     = DTAG[fsave_frame];
+|	ete15_temp    = ETE15[fsave_frame];
+|	etemp_temp    = ETEMP[fsave_frame];
+|
+|	CUPC[fsave_frame] = 0000000;
+|	FRESTORE
+|	FSAVE
+|
+|
+|	/* If the xu instruction is exceptional, we punt.
+|	 * Otherwise, we would have to include OVFL/UNFL handler
+|	 * code here to get the correct answer.
+|	 */
+|	if (fsave_frame_format == $4060) {goto KILL_PROCESS}
+|
+|	fsave_frame = /* build a long frame of all zeros */
+|	fsave_frame_format = $4060;  /* label it as long frame */
+|
+|	/* load it with the temps we saved */
+|	STAG[fsave_frame]     =  stag_temp;
+|	CMDREG1B[fsave_frame] =  cmdreg1b_temp;
+|	DTAG[fsave_frame]     =  dtag_temp;
+|	ETE15[fsave_frame]    =  ete15_temp;
+|	ETEMP[fsave_frame]    =  etemp_temp;
+|
+|	/* Make sure that the cmdreg3b dest reg is not going to
+|	 * be destroyed by a FMOVEM at the end of all this code.
+|	 * If it is, you should move the current value of the reg
+|	 * onto the stack so that the reg will loaded with that value.
+|	 */
+|
+|	/* All done.  Proceed with the code below */
+|    }
+|
+|    if (etemp_exponent == min_sgl)   etemp_exponent = min_dbl;
+|    if (etemp_exponent == max_sgl)   etemp_exponent = max_dbl;
+|    cmdreg1b[15:10] = 010101;
+|    clear(bug_flag_procIDxxxx);
+|    FRESTORE and return;
+|
+|
+|    NOFIX:
+|    clear(bug_flag_procIDxxxx);
+|    FRESTORE and return;
+|
+
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|BUGFIX    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	fpsp_fmt_error
+
+	.global	b1238_fix
+b1238_fix:
+|
+| This code is entered only on completion of the handling of an
+| nu-generated ovfl, unfl, or inex exception.  If the version
+| number of the fsave is not $40, this handler is not necessary.
+| Simply branch to fix_done and exit normally.
+|
+	cmpib	#VER_40,4(%a7)
+	bne	fix_done
+|
+| Test for cu_savepc equal to zero.  If not, this is not a bug
+| #1238 case.
+|
+	moveb	CU_SAVEPC(%a6),%d0
+	andib	#0xFE,%d0
+	beq	fix_done	|if zero, this is not bug #1238
+
+|
+| Test the register conflict aspect.  If opclass0, check for
+| cu src equal to xu dest or equal to nu dest.  If so, go to
+| op0.  Else, or if opclass2, check for cu dest equal to
+| xu dest or equal to nu dest.  If so, go to tst_opcl.  Else,
+| exit, it is not the bug case.
+|
+| Check for opclass 0.  If not, go and check for opclass 2 and sgl.
+|
+	movew	CMDREG1B(%a6),%d0
+	andiw	#0xE000,%d0		|strip all but opclass
+	bne	op2sgl			|not opclass 0, check op2
+|
+| Check for cu and nu register conflict.  If one exists, this takes
+| priority over a cu and xu conflict.
+|
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get 1st src
+	bfextu	CMDREG3B(%a6){#6:#3},%d1	|get 3rd dest
+	cmpb	%d0,%d1
+	beqs	op0			|if equal, continue bugfix
+|
+| Check for cu dest equal to nu dest.  If so, go and fix the
+| bug condition.  Otherwise, exit.
+|
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	|get 1st dest
+	cmpb	%d0,%d1			|cmp 1st dest with 3rd dest
+	beqs	op0			|if equal, continue bugfix
+|
+| Check for cu and xu register conflict.
+|
+	bfextu	CMDREG2B(%a6){#6:#3},%d1	|get 2nd dest
+	cmpb	%d0,%d1			|cmp 1st dest with 2nd dest
+	beqs	op0_xu			|if equal, continue bugfix
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get 1st src
+	cmpb	%d0,%d1			|cmp 1st src with 2nd dest
+	beq	op0_xu
+	bne	fix_done		|if the reg checks fail, exit
+|
+| We have the opclass 0 situation.
+|
+op0:
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get source register no
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx %d0,ETEMP(%a6)		|load source to ETEMP
+
+	moveb	#0x12,%d0
+	bfins	%d0,CMDREG1B(%a6){#0:#6}	|opclass 2, extended
+|
+|	Set ETEMP exponent bit 15 as the opposite of ete14
+|
+	btst	#6,ETEMP_EX(%a6)		|check etemp exponent bit 14
+	beq	setete15
+	bclr	#etemp15_bit,STAG(%a6)
+	bra	finish
+setete15:
+	bset	#etemp15_bit,STAG(%a6)
+	bra	finish
+
+|
+| We have the case in which a conflict exists between the cu src or
+| dest and the dest of the xu.  We must clear the instruction in
+| the cu and restore the state, allowing the instruction in the
+| xu to complete.  Remember, the instruction in the nu
+| was exceptional, and was completed by the appropriate handler.
+| If the result of the xu instruction is not exceptional, we can
+| restore the instruction from the cu to the frame and continue
+| processing the original exception.  If the result is also
+| exceptional, we choose to kill the process.
+|
+|	Items saved from the stack:
+|
+|		$3c stag     - L_SCR1
+|		$40 cmdreg1b - L_SCR2
+|		$44 dtag     - L_SCR3
+|
+| The cu savepc is set to zero, and the frame is restored to the
+| fpu.
+|
+op0_xu:
+	movel	STAG(%a6),L_SCR1(%a6)
+	movel	CMDREG1B(%a6),L_SCR2(%a6)
+	movel	DTAG(%a6),L_SCR3(%a6)
+	andil	#0xe0000000,L_SCR3(%a6)
+	moveb	#0,CU_SAVEPC(%a6)
+	movel	(%a7)+,%d1		|save return address from bsr
+	frestore (%a7)+
+	fsave	-(%a7)
+|
+| Check if the instruction which just completed was exceptional.
+|
+	cmpw	#0x4060,(%a7)
+	beq	op0_xb
+|
+| It is necessary to isolate the result of the instruction in the
+| xu if it is to fp0 - fp3 and write that value to the USER_FPn
+| locations on the stack.  The correct destination register is in
+| cmdreg2b.
+|
+	bfextu	CMDREG2B(%a6){#6:#3},%d0	|get dest register no
+	cmpil	#3,%d0
+	bgts	op0_xi
+	beqs	op0_fp3
+	cmpil	#1,%d0
+	blts	op0_fp0
+	beqs	op0_fp1
+op0_fp2:
+	fmovemx %fp2-%fp2,USER_FP2(%a6)
+	bras	op0_xi
+op0_fp1:
+	fmovemx %fp1-%fp1,USER_FP1(%a6)
+	bras	op0_xi
+op0_fp0:
+	fmovemx %fp0-%fp0,USER_FP0(%a6)
+	bras	op0_xi
+op0_fp3:
+	fmovemx %fp3-%fp3,USER_FP3(%a6)
+|
+| The frame returned is idle.  We must build a busy frame to hold
+| the cu state information and setup etemp.
+|
+op0_xi:
+	movel	#22,%d0		|clear 23 lwords
+	clrl	(%a7)
+op0_loop:
+	clrl	-(%a7)
+	dbf	%d0,op0_loop
+	movel	#0x40600000,-(%a7)
+	movel	L_SCR1(%a6),STAG(%a6)
+	movel	L_SCR2(%a6),CMDREG1B(%a6)
+	movel	L_SCR3(%a6),DTAG(%a6)
+	moveb	#0x6,CU_SAVEPC(%a6)
+	movel	%d1,-(%a7)		|return bsr return address
+	bfextu	CMDREG1B(%a6){#3:#3},%d0	|get source register no
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx %d0,ETEMP(%a6)		|load source to ETEMP
+
+	moveb	#0x12,%d0
+	bfins	%d0,CMDREG1B(%a6){#0:#6}	|opclass 2, extended
+|
+|	Set ETEMP exponent bit 15 as the opposite of ete14
+|
+	btst	#6,ETEMP_EX(%a6)		|check etemp exponent bit 14
+	beq	op0_sete15
+	bclr	#etemp15_bit,STAG(%a6)
+	bra	finish
+op0_sete15:
+	bset	#etemp15_bit,STAG(%a6)
+	bra	finish
+
+|
+| The frame returned is busy.  It is not possible to reconstruct
+| the code sequence to allow completion.  We will jump to
+| fpsp_fmt_error and allow the kernel to kill the process.
+|
+op0_xb:
+	jmp	fpsp_fmt_error
+
+|
+| Check for opclass 2 and single size.  If not both, exit.
+|
+op2sgl:
+	movew	CMDREG1B(%a6),%d0
+	andiw	#0xFC00,%d0		|strip all but opclass and size
+	cmpiw	#0x4400,%d0		|test for opclass 2 and size=sgl
+	bne	fix_done		|if not, it is not bug 1238
+|
+| Check for cu dest equal to nu dest or equal to xu dest, with
+| a cu and nu conflict taking priority an nu conflict.  If either,
+| go and fix the bug condition.  Otherwise, exit.
+|
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	|get 1st dest
+	bfextu	CMDREG3B(%a6){#6:#3},%d1	|get 3rd dest
+	cmpb	%d0,%d1			|cmp 1st dest with 3rd dest
+	beq	op2_com			|if equal, continue bugfix
+	bfextu	CMDREG2B(%a6){#6:#3},%d1	|get 2nd dest
+	cmpb	%d0,%d1			|cmp 1st dest with 2nd dest
+	bne	fix_done		|if the reg checks fail, exit
+|
+| We have the case in which a conflict exists between the cu src or
+| dest and the dest of the xu.  We must clear the instruction in
+| the cu and restore the state, allowing the instruction in the
+| xu to complete.  Remember, the instruction in the nu
+| was exceptional, and was completed by the appropriate handler.
+| If the result of the xu instruction is not exceptional, we can
+| restore the instruction from the cu to the frame and continue
+| processing the original exception.  If the result is also
+| exceptional, we choose to kill the process.
+|
+|	Items saved from the stack:
+|
+|		$3c stag     - L_SCR1
+|		$40 cmdreg1b - L_SCR2
+|		$44 dtag     - L_SCR3
+|		etemp        - FP_SCR2
+|
+| The cu savepc is set to zero, and the frame is restored to the
+| fpu.
+|
+op2_xu:
+	movel	STAG(%a6),L_SCR1(%a6)
+	movel	CMDREG1B(%a6),L_SCR2(%a6)
+	movel	DTAG(%a6),L_SCR3(%a6)
+	andil	#0xe0000000,L_SCR3(%a6)
+	moveb	#0,CU_SAVEPC(%a6)
+	movel	ETEMP(%a6),FP_SCR2(%a6)
+	movel	ETEMP_HI(%a6),FP_SCR2+4(%a6)
+	movel	ETEMP_LO(%a6),FP_SCR2+8(%a6)
+	movel	(%a7)+,%d1		|save return address from bsr
+	frestore (%a7)+
+	fsave	-(%a7)
+|
+| Check if the instruction which just completed was exceptional.
+|
+	cmpw	#0x4060,(%a7)
+	beq	op2_xb
+|
+| It is necessary to isolate the result of the instruction in the
+| xu if it is to fp0 - fp3 and write that value to the USER_FPn
+| locations on the stack.  The correct destination register is in
+| cmdreg2b.
+|
+	bfextu	CMDREG2B(%a6){#6:#3},%d0	|get dest register no
+	cmpil	#3,%d0
+	bgts	op2_xi
+	beqs	op2_fp3
+	cmpil	#1,%d0
+	blts	op2_fp0
+	beqs	op2_fp1
+op2_fp2:
+	fmovemx %fp2-%fp2,USER_FP2(%a6)
+	bras	op2_xi
+op2_fp1:
+	fmovemx %fp1-%fp1,USER_FP1(%a6)
+	bras	op2_xi
+op2_fp0:
+	fmovemx %fp0-%fp0,USER_FP0(%a6)
+	bras	op2_xi
+op2_fp3:
+	fmovemx %fp3-%fp3,USER_FP3(%a6)
+|
+| The frame returned is idle.  We must build a busy frame to hold
+| the cu state information and fix up etemp.
+|
+op2_xi:
+	movel	#22,%d0		|clear 23 lwords
+	clrl	(%a7)
+op2_loop:
+	clrl	-(%a7)
+	dbf	%d0,op2_loop
+	movel	#0x40600000,-(%a7)
+	movel	L_SCR1(%a6),STAG(%a6)
+	movel	L_SCR2(%a6),CMDREG1B(%a6)
+	movel	L_SCR3(%a6),DTAG(%a6)
+	moveb	#0x6,CU_SAVEPC(%a6)
+	movel	FP_SCR2(%a6),ETEMP(%a6)
+	movel	FP_SCR2+4(%a6),ETEMP_HI(%a6)
+	movel	FP_SCR2+8(%a6),ETEMP_LO(%a6)
+	movel	%d1,-(%a7)
+	bra	op2_com
+
+|
+| We have the opclass 2 single source situation.
+|
+op2_com:
+	moveb	#0x15,%d0
+	bfins	%d0,CMDREG1B(%a6){#0:#6}	|opclass 2, double
+
+	cmpw	#0x407F,ETEMP_EX(%a6)	|single +max
+	bnes	case2
+	movew	#0x43FF,ETEMP_EX(%a6)	|to double +max
+	bra	finish
+case2:
+	cmpw	#0xC07F,ETEMP_EX(%a6)	|single -max
+	bnes	case3
+	movew	#0xC3FF,ETEMP_EX(%a6)	|to double -max
+	bra	finish
+case3:
+	cmpw	#0x3F80,ETEMP_EX(%a6)	|single +min
+	bnes	case4
+	movew	#0x3C00,ETEMP_EX(%a6)	|to double +min
+	bra	finish
+case4:
+	cmpw	#0xBF80,ETEMP_EX(%a6)	|single -min
+	bne	fix_done
+	movew	#0xBC00,ETEMP_EX(%a6)	|to double -min
+	bra	finish
+|
+| The frame returned is busy.  It is not possible to reconstruct
+| the code sequence to allow completion.  fpsp_fmt_error causes
+| an fline illegal instruction to be executed.
+|
+| You should replace the jump to fpsp_fmt_error with a jump
+| to the entry point used to kill a process.
+|
+op2_xb:
+	jmp	fpsp_fmt_error
+
+|
+| Enter here if the case is not of the situations affected by
+| bug #1238, or if the fix is completed, and exit.
+|
+finish:
+fix_done:
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/decbin.S b/arch/m68k/fpsp040/decbin.S
new file mode 100644
index 0000000..2160609
--- /dev/null
+++ b/arch/m68k/fpsp040/decbin.S
@@ -0,0 +1,506 @@
+|
+|	decbin.sa 3.3 12/19/90
+|
+|	Description: Converts normalized packed bcd value pointed to by
+|	register A6 to extended-precision value in FP0.
+|
+|	Input: Normalized packed bcd value in ETEMP(a6).
+|
+|	Output:	Exact floating-point representation of the packed bcd value.
+|
+|	Saves and Modifies: D2-D5
+|
+|	Speed: The program decbin takes ??? cycles to execute.
+|
+|	Object Size:
+|
+|	External Reference(s): None.
+|
+|	Algorithm:
+|	Expected is a normal bcd (i.e. non-exceptional; all inf, zero,
+|	and NaN operands are dispatched without entering this routine)
+|	value in 68881/882 format at location ETEMP(A6).
+|
+|	A1.	Convert the bcd exponent to binary by successive adds and muls.
+|	Set the sign according to SE. Subtract 16 to compensate
+|	for the mantissa which is to be interpreted as 17 integer
+|	digits, rather than 1 integer and 16 fraction digits.
+|	Note: this operation can never overflow.
+|
+|	A2. Convert the bcd mantissa to binary by successive
+|	adds and muls in FP0. Set the sign according to SM.
+|	The mantissa digits will be converted with the decimal point
+|	assumed following the least-significant digit.
+|	Note: this operation can never overflow.
+|
+|	A3. Count the number of leading/trailing zeros in the
+|	bcd string.  If SE is positive, count the leading zeros;
+|	if negative, count the trailing zeros.  Set the adjusted
+|	exponent equal to the exponent from A1 and the zero count
+|	added if SM = 1 and subtracted if SM = 0.  Scale the
+|	mantissa the equivalent of forcing in the bcd value:
+|
+|	SM = 0	a non-zero digit in the integer position
+|	SM = 1	a non-zero digit in Mant0, lsd of the fraction
+|
+|	this will insure that any value, regardless of its
+|	representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted
+|	consistently.
+|
+|	A4. Calculate the factor 10^exp in FP1 using a table of
+|	10^(2^n) values.  To reduce the error in forming factors
+|	greater than 10^27, a directed rounding scheme is used with
+|	tables rounded to RN, RM, and RP, according to the table
+|	in the comments of the pwrten section.
+|
+|	A5. Form the final binary number by scaling the mantissa by
+|	the exponent factor.  This is done by multiplying the
+|	mantissa in FP0 by the factor in FP1 if the adjusted
+|	exponent sign is positive, and dividing FP0 by FP1 if
+|	it is negative.
+|
+|	Clean up and return.  Check if the final mul or div resulted
+|	in an inex2 exception.  If so, set inex1 in the fpsr and
+|	check if the inex1 exception is enabled.  If so, set d7 upper
+|	word to $0100.  This will signal unimp.sa that an enabled inex1
+|	exception occurred.  Unimp will fix the stack.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|DECBIN    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+|
+|	PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
+|	to nearest, minus, and plus, respectively.  The tables include
+|	10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
+|	is required until the power is greater than 27, however, all
+|	tables include the first 5 for ease of indexing.
+|
+	|xref	PTENRN
+	|xref	PTENRM
+	|xref	PTENRP
+
+RTABLE:	.byte	0,0,0,0
+	.byte	2,3,2,3
+	.byte	2,3,3,2
+	.byte	3,2,2,3
+
+	.global	decbin
+	.global	calc_e
+	.global	pwrten
+	.global	calc_m
+	.global	norm
+	.global	ap_st_z
+	.global	ap_st_n
+|
+	.set	FNIBS,7
+	.set	FSTRT,0
+|
+	.set	ESTRT,4
+	.set	EDIGITS,2	|
+|
+| Constants in single precision
+FZERO:	.long	0x00000000
+FONE:	.long	0x3F800000
+FTEN:	.long	0x41200000
+
+	.set	TEN,10
+
+|
+decbin:
+	| fmovel	#0,FPCR		;clr real fpcr
+	moveml	%d2-%d5,-(%a7)
+|
+| Calculate exponent:
+|  1. Copy bcd value in memory for use as a working copy.
+|  2. Calculate absolute value of exponent in d1 by mul and add.
+|  3. Correct for exponent sign.
+|  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
+|     (i.e., all digits assumed left of the decimal point.)
+|
+| Register usage:
+|
+|  calc_e:
+|	(*)  d0: temp digit storage
+|	(*)  d1: accumulator for binary exponent
+|	(*)  d2: digit count
+|	(*)  d3: offset pointer
+|	( )  d4: first word of bcd
+|	( )  a0: pointer to working bcd value
+|	( )  a6: pointer to original bcd value
+|	(*)  FP_SCR1: working copy of original bcd value
+|	(*)  L_SCR1: copy of original exponent word
+|
+calc_e:
+	movel	#EDIGITS,%d2	|# of nibbles (digits) in fraction part
+	moveql	#ESTRT,%d3	|counter to pick up digits
+	leal	FP_SCR1(%a6),%a0	|load tmp bcd storage address
+	movel	ETEMP(%a6),(%a0)	|save input bcd value
+	movel	ETEMP_HI(%a6),4(%a0) |save words 2 and 3
+	movel	ETEMP_LO(%a6),8(%a0) |and work with these
+	movel	(%a0),%d4	|get first word of bcd
+	clrl	%d1		|zero d1 for accumulator
+e_gd:
+	mulul	#TEN,%d1	|mul partial product by one digit place
+	bfextu	%d4{%d3:#4},%d0	|get the digit and zero extend into d0
+	addl	%d0,%d1		|d1 = d1 + d0
+	addqb	#4,%d3		|advance d3 to the next digit
+	dbf	%d2,e_gd	|if we have used all 3 digits, exit loop
+	btst	#30,%d4		|get SE
+	beqs	e_pos		|don't negate if pos
+	negl	%d1		|negate before subtracting
+e_pos:
+	subl	#16,%d1		|sub to compensate for shift of mant
+	bges	e_save		|if still pos, do not neg
+	negl	%d1		|now negative, make pos and set SE
+	orl	#0x40000000,%d4	|set SE in d4,
+	orl	#0x40000000,(%a0)	|and in working bcd
+e_save:
+	movel	%d1,L_SCR1(%a6)	|save exp in memory
+|
+|
+| Calculate mantissa:
+|  1. Calculate absolute value of mantissa in fp0 by mul and add.
+|  2. Correct for mantissa sign.
+|     (i.e., all digits assumed left of the decimal point.)
+|
+| Register usage:
+|
+|  calc_m:
+|	(*)  d0: temp digit storage
+|	(*)  d1: lword counter
+|	(*)  d2: digit count
+|	(*)  d3: offset pointer
+|	( )  d4: words 2 and 3 of bcd
+|	( )  a0: pointer to working bcd value
+|	( )  a6: pointer to original bcd value
+|	(*) fp0: mantissa accumulator
+|	( )  FP_SCR1: working copy of original bcd value
+|	( )  L_SCR1: copy of original exponent word
+|
+calc_m:
+	moveql	#1,%d1		|word counter, init to 1
+	fmoves	FZERO,%fp0	|accumulator
+|
+|
+|  Since the packed number has a long word between the first & second parts,
+|  get the integer digit then skip down & get the rest of the
+|  mantissa.  We will unroll the loop once.
+|
+	bfextu	(%a0){#28:#4},%d0	|integer part is ls digit in long word
+	faddb	%d0,%fp0		|add digit to sum in fp0
+|
+|
+|  Get the rest of the mantissa.
+|
+loadlw:
+	movel	(%a0,%d1.L*4),%d4	|load mantissa longword into d4
+	moveql	#FSTRT,%d3	|counter to pick up digits
+	moveql	#FNIBS,%d2	|reset number of digits per a0 ptr
+md2b:
+	fmuls	FTEN,%fp0	|fp0 = fp0 * 10
+	bfextu	%d4{%d3:#4},%d0	|get the digit and zero extend
+	faddb	%d0,%fp0	|fp0 = fp0 + digit
+|
+|
+|  If all the digits (8) in that long word have been converted (d2=0),
+|  then inc d1 (=2) to point to the next long word and reset d3 to 0
+|  to initialize the digit offset, and set d2 to 7 for the digit count;
+|  else continue with this long word.
+|
+	addqb	#4,%d3		|advance d3 to the next digit
+	dbf	%d2,md2b		|check for last digit in this lw
+nextlw:
+	addql	#1,%d1		|inc lw pointer in mantissa
+	cmpl	#2,%d1		|test for last lw
+	ble	loadlw		|if not, get last one
+
+|
+|  Check the sign of the mant and make the value in fp0 the same sign.
+|
+m_sign:
+	btst	#31,(%a0)	|test sign of the mantissa
+	beq	ap_st_z		|if clear, go to append/strip zeros
+	fnegx	%fp0		|if set, negate fp0
+
+|
+| Append/strip zeros:
+|
+|  For adjusted exponents which have an absolute value greater than 27*,
+|  this routine calculates the amount needed to normalize the mantissa
+|  for the adjusted exponent.  That number is subtracted from the exp
+|  if the exp was positive, and added if it was negative.  The purpose
+|  of this is to reduce the value of the exponent and the possibility
+|  of error in calculation of pwrten.
+|
+|  1. Branch on the sign of the adjusted exponent.
+|  2p.(positive exp)
+|   2. Check M16 and the digits in lwords 2 and 3 in descending order.
+|   3. Add one for each zero encountered until a non-zero digit.
+|   4. Subtract the count from the exp.
+|   5. Check if the exp has crossed zero in #3 above; make the exp abs
+|	   and set SE.
+|	6. Multiply the mantissa by 10**count.
+|  2n.(negative exp)
+|   2. Check the digits in lwords 3 and 2 in descending order.
+|   3. Add one for each zero encountered until a non-zero digit.
+|   4. Add the count to the exp.
+|   5. Check if the exp has crossed zero in #3 above; clear SE.
+|   6. Divide the mantissa by 10**count.
+|
+|  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
+|   any adjustment due to append/strip zeros will drive the resultant
+|   exponent towards zero.  Since all pwrten constants with a power
+|   of 27 or less are exact, there is no need to use this routine to
+|   attempt to lessen the resultant exponent.
+|
+| Register usage:
+|
+|  ap_st_z:
+|	(*)  d0: temp digit storage
+|	(*)  d1: zero count
+|	(*)  d2: digit count
+|	(*)  d3: offset pointer
+|	( )  d4: first word of bcd
+|	(*)  d5: lword counter
+|	( )  a0: pointer to working bcd value
+|	( )  FP_SCR1: working copy of original bcd value
+|	( )  L_SCR1: copy of original exponent word
+|
+|
+| First check the absolute value of the exponent to see if this
+| routine is necessary.  If so, then check the sign of the exponent
+| and do append (+) or strip (-) zeros accordingly.
+| This section handles a positive adjusted exponent.
+|
+ap_st_z:
+	movel	L_SCR1(%a6),%d1	|load expA for range test
+	cmpl	#27,%d1		|test is with 27
+	ble	pwrten		|if abs(expA) <28, skip ap/st zeros
+	btst	#30,(%a0)	|check sign of exp
+	bne	ap_st_n		|if neg, go to neg side
+	clrl	%d1		|zero count reg
+	movel	(%a0),%d4		|load lword 1 to d4
+	bfextu	%d4{#28:#4},%d0	|get M16 in d0
+	bnes	ap_p_fx		|if M16 is non-zero, go fix exp
+	addql	#1,%d1		|inc zero count
+	moveql	#1,%d5		|init lword counter
+	movel	(%a0,%d5.L*4),%d4	|get lword 2 to d4
+	bnes	ap_p_cl		|if lw 2 is zero, skip it
+	addql	#8,%d1		|and inc count by 8
+	addql	#1,%d5		|inc lword counter
+	movel	(%a0,%d5.L*4),%d4	|get lword 3 to d4
+ap_p_cl:
+	clrl	%d3		|init offset reg
+	moveql	#7,%d2		|init digit counter
+ap_p_gd:
+	bfextu	%d4{%d3:#4},%d0	|get digit
+	bnes	ap_p_fx		|if non-zero, go to fix exp
+	addql	#4,%d3		|point to next digit
+	addql	#1,%d1		|inc digit counter
+	dbf	%d2,ap_p_gd	|get next digit
+ap_p_fx:
+	movel	%d1,%d0		|copy counter to d2
+	movel	L_SCR1(%a6),%d1	|get adjusted exp from memory
+	subl	%d0,%d1		|subtract count from exp
+	bges	ap_p_fm		|if still pos, go to pwrten
+	negl	%d1		|now its neg; get abs
+	movel	(%a0),%d4		|load lword 1 to d4
+	orl	#0x40000000,%d4	| and set SE in d4
+	orl	#0x40000000,(%a0)	| and in memory
+|
+| Calculate the mantissa multiplier to compensate for the striping of
+| zeros from the mantissa.
+|
+ap_p_fm:
+	movel	#PTENRN,%a1	|get address of power-of-ten table
+	clrl	%d3		|init table index
+	fmoves	FONE,%fp1	|init fp1 to 1
+	moveql	#3,%d2		|init d2 to count bits in counter
+ap_p_el:
+	asrl	#1,%d0		|shift lsb into carry
+	bccs	ap_p_en		|if 1, mul fp1 by pwrten factor
+	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
+ap_p_en:
+	addl	#12,%d3		|inc d3 to next rtable entry
+	tstl	%d0		|check if d0 is zero
+	bnes	ap_p_el		|if not, get next bit
+	fmulx	%fp1,%fp0		|mul mantissa by 10**(no_bits_shifted)
+	bra	pwrten		|go calc pwrten
+|
+| This section handles a negative adjusted exponent.
+|
+ap_st_n:
+	clrl	%d1		|clr counter
+	moveql	#2,%d5		|set up d5 to point to lword 3
+	movel	(%a0,%d5.L*4),%d4	|get lword 3
+	bnes	ap_n_cl		|if not zero, check digits
+	subl	#1,%d5		|dec d5 to point to lword 2
+	addql	#8,%d1		|inc counter by 8
+	movel	(%a0,%d5.L*4),%d4	|get lword 2
+ap_n_cl:
+	movel	#28,%d3		|point to last digit
+	moveql	#7,%d2		|init digit counter
+ap_n_gd:
+	bfextu	%d4{%d3:#4},%d0	|get digit
+	bnes	ap_n_fx		|if non-zero, go to exp fix
+	subql	#4,%d3		|point to previous digit
+	addql	#1,%d1		|inc digit counter
+	dbf	%d2,ap_n_gd	|get next digit
+ap_n_fx:
+	movel	%d1,%d0		|copy counter to d0
+	movel	L_SCR1(%a6),%d1	|get adjusted exp from memory
+	subl	%d0,%d1		|subtract count from exp
+	bgts	ap_n_fm		|if still pos, go fix mantissa
+	negl	%d1		|take abs of exp and clr SE
+	movel	(%a0),%d4		|load lword 1 to d4
+	andl	#0xbfffffff,%d4	| and clr SE in d4
+	andl	#0xbfffffff,(%a0)	| and in memory
+|
+| Calculate the mantissa multiplier to compensate for the appending of
+| zeros to the mantissa.
+|
+ap_n_fm:
+	movel	#PTENRN,%a1	|get address of power-of-ten table
+	clrl	%d3		|init table index
+	fmoves	FONE,%fp1	|init fp1 to 1
+	moveql	#3,%d2		|init d2 to count bits in counter
+ap_n_el:
+	asrl	#1,%d0		|shift lsb into carry
+	bccs	ap_n_en		|if 1, mul fp1 by pwrten factor
+	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
+ap_n_en:
+	addl	#12,%d3		|inc d3 to next rtable entry
+	tstl	%d0		|check if d0 is zero
+	bnes	ap_n_el		|if not, get next bit
+	fdivx	%fp1,%fp0		|div mantissa by 10**(no_bits_shifted)
+|
+|
+| Calculate power-of-ten factor from adjusted and shifted exponent.
+|
+| Register usage:
+|
+|  pwrten:
+|	(*)  d0: temp
+|	( )  d1: exponent
+|	(*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
+|	(*)  d3: FPCR work copy
+|	( )  d4: first word of bcd
+|	(*)  a1: RTABLE pointer
+|  calc_p:
+|	(*)  d0: temp
+|	( )  d1: exponent
+|	(*)  d3: PWRTxx table index
+|	( )  a0: pointer to working copy of bcd
+|	(*)  a1: PWRTxx pointer
+|	(*) fp1: power-of-ten accumulator
+|
+| Pwrten calculates the exponent factor in the selected rounding mode
+| according to the following table:
+|
+|	Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
+|
+|	ANY	  ANY	RN	RN
+|
+|	 +	   +	RP	RP
+|	 -	   +	RP	RM
+|	 +	   -	RP	RM
+|	 -	   -	RP	RP
+|
+|	 +	   +	RM	RM
+|	 -	   +	RM	RP
+|	 +	   -	RM	RP
+|	 -	   -	RM	RM
+|
+|	 +	   +	RZ	RM
+|	 -	   +	RZ	RM
+|	 +	   -	RZ	RP
+|	 -	   -	RZ	RP
+|
+|
+pwrten:
+	movel	USER_FPCR(%a6),%d3 |get user's FPCR
+	bfextu	%d3{#26:#2},%d2	|isolate rounding mode bits
+	movel	(%a0),%d4		|reload 1st bcd word to d4
+	asll	#2,%d2		|format d2 to be
+	bfextu	%d4{#0:#2},%d0	| {FPCR[6],FPCR[5],SM,SE}
+	addl	%d0,%d2		|in d2 as index into RTABLE
+	leal	RTABLE,%a1	|load rtable base
+	moveb	(%a1,%d2),%d0	|load new rounding bits from table
+	clrl	%d3			|clear d3 to force no exc and extended
+	bfins	%d0,%d3{#26:#2}	|stuff new rounding bits in FPCR
+	fmovel	%d3,%FPCR		|write new FPCR
+	asrl	#1,%d0		|write correct PTENxx table
+	bccs	not_rp		|to a1
+	leal	PTENRP,%a1	|it is RP
+	bras	calc_p		|go to init section
+not_rp:
+	asrl	#1,%d0		|keep checking
+	bccs	not_rm
+	leal	PTENRM,%a1	|it is RM
+	bras	calc_p		|go to init section
+not_rm:
+	leal	PTENRN,%a1	|it is RN
+calc_p:
+	movel	%d1,%d0		|copy exp to d0;use d0
+	bpls	no_neg		|if exp is negative,
+	negl	%d0		|invert it
+	orl	#0x40000000,(%a0)	|and set SE bit
+no_neg:
+	clrl	%d3		|table index
+	fmoves	FONE,%fp1	|init fp1 to 1
+e_loop:
+	asrl	#1,%d0		|shift next bit into carry
+	bccs	e_next		|if zero, skip the mul
+	fmulx	(%a1,%d3),%fp1	|mul by 10**(d3_bit_no)
+e_next:
+	addl	#12,%d3		|inc d3 to next rtable entry
+	tstl	%d0		|check if d0 is zero
+	bnes	e_loop		|not zero, continue shifting
+|
+|
+|  Check the sign of the adjusted exp and make the value in fp0 the
+|  same sign. If the exp was pos then multiply fp1*fp0;
+|  else divide fp0/fp1.
+|
+| Register Usage:
+|  norm:
+|	( )  a0: pointer to working bcd value
+|	(*) fp0: mantissa accumulator
+|	( ) fp1: scaling factor - 10**(abs(exp))
+|
+norm:
+	btst	#30,(%a0)	|test the sign of the exponent
+	beqs	mul		|if clear, go to multiply
+div:
+	fdivx	%fp1,%fp0		|exp is negative, so divide mant by exp
+	bras	end_dec
+mul:
+	fmulx	%fp1,%fp0		|exp is positive, so multiply by exp
+|
+|
+| Clean up and return with result in fp0.
+|
+| If the final mul/div in decbin incurred an inex exception,
+| it will be inex2, but will be reported as inex1 by get_op.
+|
+end_dec:
+	fmovel	%FPSR,%d0		|get status register
+	bclrl	#inex2_bit+8,%d0	|test for inex2 and clear it
+	fmovel	%d0,%FPSR		|return status reg w/o inex2
+	beqs	no_exc		|skip this if no exc
+	orl	#inx1a_mask,USER_FPSR(%a6) |set inex1/ainex
+no_exc:
+	moveml	(%a7)+,%d2-%d5
+	rts
+	|end
diff --git a/arch/m68k/fpsp040/do_func.S b/arch/m68k/fpsp040/do_func.S
new file mode 100644
index 0000000..81f6a98
--- /dev/null
+++ b/arch/m68k/fpsp040/do_func.S
@@ -0,0 +1,559 @@
+|
+|	do_func.sa 3.4 2/18/91
+|
+| Do_func performs the unimplemented operation.  The operation
+| to be performed is determined from the lower 7 bits of the
+| extension word (except in the case of fmovecr and fsincos).
+| The opcode and tag bits form an index into a jump table in
+| tbldo.sa.  Cases of zero, infinity and NaN are handled in
+| do_func by forcing the default result.  Normalized and
+| denormalized (there are no unnormalized numbers at this
+| point) are passed onto the emulation code.
+|
+| CMDREG1B and STAG are extracted from the fsave frame
+| and combined to form the table index.  The function called
+| will start with a0 pointing to the ETEMP operand.  Dyadic
+| functions can find FPTEMP at -12(a0).
+|
+| Called functions return their result in fp0.  Sincos returns
+| sin(x) in fp0 and cos(x) in fp1.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+DO_FUNC:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	t_dz2
+	|xref	t_operr
+	|xref	t_inx2
+	|xref	t_resdnrm
+	|xref	dst_nan
+	|xref	src_nan
+	|xref	nrm_set
+	|xref	sto_cos
+
+	|xref	tblpre
+	|xref	slognp1,slogn,slog10,slog2
+	|xref	slognd,slog10d,slog2d
+	|xref	smod,srem
+	|xref	sscale
+	|xref	smovcr
+
+PONE:	.long	0x3fff0000,0x80000000,0x00000000	|+1
+MONE:	.long	0xbfff0000,0x80000000,0x00000000	|-1
+PZERO:	.long	0x00000000,0x00000000,0x00000000	|+0
+MZERO:	.long	0x80000000,0x00000000,0x00000000	|-0
+PINF:	.long	0x7fff0000,0x00000000,0x00000000	|+inf
+MINF:	.long	0xffff0000,0x00000000,0x00000000	|-inf
+QNAN:	.long	0x7fff0000,0xffffffff,0xffffffff	|non-signaling nan
+PPIBY2:  .long	0x3FFF0000,0xC90FDAA2,0x2168C235	|+PI/2
+MPIBY2:  .long	0xbFFF0000,0xC90FDAA2,0x2168C235	|-PI/2
+
+	.global	do_func
+do_func:
+	clrb	CU_ONLY(%a6)
+|
+| Check for fmovecr.  It does not follow the format of fp gen
+| unimplemented instructions.  The test is on the upper 6 bits;
+| if they are $17, the inst is fmovecr.  Call entry smovcr
+| directly.
+|
+	bfextu	CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
+	cmpil	#0x17,%d0		|if op class and size fields are $17,
+|				;it is FMOVECR; if not, continue
+	bnes	not_fmovecr
+	jmp	smovcr		|fmovecr; jmp directly to emulation
+
+not_fmovecr:
+	movew	CMDREG1B(%a6),%d0
+	andl	#0x7F,%d0
+	cmpil	#0x38,%d0		|if the extension is >= $38,
+	bge	serror		|it is illegal
+	bfextu	STAG(%a6){#0:#3},%d1
+	lsll	#3,%d0		|make room for STAG
+	addl	%d1,%d0		|combine for final index into table
+	leal	tblpre,%a1	|start of monster jump table
+	movel	(%a1,%d0.w*4),%a1	|real target address
+	leal	ETEMP(%a6),%a0	|a0 is pointer to src op
+	movel	USER_FPCR(%a6),%d1
+	andl	#0xFF,%d1		| discard all but rounding mode/prec
+	fmovel	#0,%fpcr
+	jmp	(%a1)
+|
+|	ERROR
+|
+	.global	serror
+serror:
+	st	STORE_FLG(%a6)
+	rts
+|
+| These routines load forced values into fp0.  They are called
+| by index into tbldo.
+|
+| Load a signed zero to fp0 and set inex2/ainex
+|
+	.global	snzrinx
+snzrinx:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|get sign of source operand
+	bnes	ld_mzinx	|if negative, branch
+	bsr	ld_pzero	|bsr so we can return and set inx
+	bra	t_inx2		|now, set the inx for the next inst
+ld_mzinx:
+	bsr	ld_mzero	|if neg, load neg zero, return here
+	bra	t_inx2		|now, set the inx for the next inst
+|
+| Load a signed zero to fp0; do not set inex2/ainex
+|
+	.global	szero
+szero:
+	btstb	#sign_bit,LOCAL_EX(%a0) |get sign of source operand
+	bne	ld_mzero	|if neg, load neg zero
+	bra	ld_pzero	|load positive zero
+|
+| Load a signed infinity to fp0; do not set inex2/ainex
+|
+	.global	sinf
+sinf:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|get sign of source operand
+	bne	ld_minf			|if negative branch
+	bra	ld_pinf
+|
+| Load a signed one to fp0; do not set inex2/ainex
+|
+	.global	sone
+sone:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
+	bne	ld_mone
+	bra	ld_pone
+|
+| Load a signed pi/2 to fp0; do not set inex2/ainex
+|
+	.global	spi_2
+spi_2:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
+	bne	ld_mpi2
+	bra	ld_ppi2
+|
+| Load either a +0 or +inf for plus/minus operand
+|
+	.global	szr_inf
+szr_inf:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
+	bne	ld_pzero
+	bra	ld_pinf
+|
+| Result is either an operr or +inf for plus/minus operand
+| [Used by slogn, slognp1, slog10, and slog2]
+|
+	.global	sopr_inf
+sopr_inf:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
+	bne	t_operr
+	bra	ld_pinf
+|
+|	FLOGNP1
+|
+	.global	sslognp1
+sslognp1:
+	fmovemx (%a0),%fp0-%fp0
+	fcmpb	#-1,%fp0
+	fbgt	slognp1
+	fbeq	t_dz2		|if = -1, divide by zero exception
+	fmovel	#0,%FPSR		|clr N flag
+	bra	t_operr		|take care of operands < -1
+|
+|	FETOXM1
+|
+	.global	setoxm1i
+setoxm1i:
+	btstb	#sign_bit,LOCAL_EX(%a0)	|check sign of source
+	bne	ld_mone
+	bra	ld_pinf
+|
+|	FLOGN
+|
+| Test for 1.0 as an input argument, returning +zero.  Also check
+| the sign and return operr if negative.
+|
+	.global	sslogn
+sslogn:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bne	t_operr		|take care of operands < 0
+	cmpiw	#0x3fff,LOCAL_EX(%a0) |test for 1.0 input
+	bne	slogn
+	cmpil	#0x80000000,LOCAL_HI(%a0)
+	bne	slogn
+	tstl	LOCAL_LO(%a0)
+	bne	slogn
+	fmovex	PZERO,%fp0
+	rts
+
+	.global	sslognd
+sslognd:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beq	slognd
+	bra	t_operr		|take care of operands < 0
+
+|
+|	FLOG10
+|
+	.global	sslog10
+sslog10:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bne	t_operr		|take care of operands < 0
+	cmpiw	#0x3fff,LOCAL_EX(%a0) |test for 1.0 input
+	bne	slog10
+	cmpil	#0x80000000,LOCAL_HI(%a0)
+	bne	slog10
+	tstl	LOCAL_LO(%a0)
+	bne	slog10
+	fmovex	PZERO,%fp0
+	rts
+
+	.global	sslog10d
+sslog10d:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beq	slog10d
+	bra	t_operr		|take care of operands < 0
+
+|
+|	FLOG2
+|
+	.global	sslog2
+sslog2:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bne	t_operr		|take care of operands < 0
+	cmpiw	#0x3fff,LOCAL_EX(%a0) |test for 1.0 input
+	bne	slog2
+	cmpil	#0x80000000,LOCAL_HI(%a0)
+	bne	slog2
+	tstl	LOCAL_LO(%a0)
+	bne	slog2
+	fmovex	PZERO,%fp0
+	rts
+
+	.global	sslog2d
+sslog2d:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beq	slog2d
+	bra	t_operr		|take care of operands < 0
+
+|
+|	FMOD
+|
+pmodt:
+|				;$21 fmod
+|				;dtag,stag
+	.long	smod		|  00,00  norm,norm = normal
+	.long	smod_oper	|  00,01  norm,zero = nan with operr
+	.long	smod_fpn	|  00,10  norm,inf  = fpn
+	.long	smod_snan	|  00,11  norm,nan  = nan
+	.long	smod_zro	|  01,00  zero,norm = +-zero
+	.long	smod_oper	|  01,01  zero,zero = nan with operr
+	.long	smod_zro	|  01,10  zero,inf  = +-zero
+	.long	smod_snan	|  01,11  zero,nan  = nan
+	.long	smod_oper	|  10,00  inf,norm  = nan with operr
+	.long	smod_oper	|  10,01  inf,zero  = nan with operr
+	.long	smod_oper	|  10,10  inf,inf   = nan with operr
+	.long	smod_snan	|  10,11  inf,nan   = nan
+	.long	smod_dnan	|  11,00  nan,norm  = nan
+	.long	smod_dnan	|  11,01  nan,zero  = nan
+	.long	smod_dnan	|  11,10  nan,inf   = nan
+	.long	smod_dnan	|  11,11  nan,nan   = nan
+
+	.global	pmod
+pmod:
+	clrb	FPSR_QBYTE(%a6) | clear quotient field
+	bfextu	STAG(%a6){#0:#3},%d0 |stag = d0
+	bfextu	DTAG(%a6){#0:#3},%d1 |dtag = d1
+
+|
+| Alias extended denorms to norms for the jump table.
+|
+	bclrl	#2,%d0
+	bclrl	#2,%d1
+
+	lslb	#2,%d1
+	orb	%d0,%d1		|d1{3:2} = dtag, d1{1:0} = stag
+|				;Tag values:
+|				;00 = norm or denorm
+|				;01 = zero
+|				;10 = inf
+|				;11 = nan
+	lea	pmodt,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+smod_snan:
+	bra	src_nan
+smod_dnan:
+	bra	dst_nan
+smod_oper:
+	bra	t_operr
+smod_zro:
+	moveb	ETEMP(%a6),%d1	|get sign of src op
+	moveb	FPTEMP(%a6),%d0	|get sign of dst op
+	eorb	%d0,%d1		|get exor of sign bits
+	btstl	#7,%d1		|test for sign
+	beqs	smod_zsn	|if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
+smod_zsn:
+	btstl	#7,%d0		|test if + or -
+	beq	ld_pzero	|if pos then load +0
+	bra	ld_mzero	|else neg load -0
+
+smod_fpn:
+	moveb	ETEMP(%a6),%d1	|get sign of src op
+	moveb	FPTEMP(%a6),%d0	|get sign of dst op
+	eorb	%d0,%d1		|get exor of sign bits
+	btstl	#7,%d1		|test for sign
+	beqs	smod_fsn	|if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
+smod_fsn:
+	tstb	DTAG(%a6)	|filter out denormal destination case
+	bpls	smod_nrm	|
+	leal	FPTEMP(%a6),%a0	|a0<- addr(FPTEMP)
+	bra	t_resdnrm	|force UNFL(but exact) result
+smod_nrm:
+	fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
+	fmovex FPTEMP(%a6),%fp0	|return dest to fp0
+	rts
+
+|
+|	FREM
+|
+premt:
+|				;$25 frem
+|				;dtag,stag
+	.long	srem		|  00,00  norm,norm = normal
+	.long	srem_oper	|  00,01  norm,zero = nan with operr
+	.long	srem_fpn	|  00,10  norm,inf  = fpn
+	.long	srem_snan	|  00,11  norm,nan  = nan
+	.long	srem_zro	|  01,00  zero,norm = +-zero
+	.long	srem_oper	|  01,01  zero,zero = nan with operr
+	.long	srem_zro	|  01,10  zero,inf  = +-zero
+	.long	srem_snan	|  01,11  zero,nan  = nan
+	.long	srem_oper	|  10,00  inf,norm  = nan with operr
+	.long	srem_oper	|  10,01  inf,zero  = nan with operr
+	.long	srem_oper	|  10,10  inf,inf   = nan with operr
+	.long	srem_snan	|  10,11  inf,nan   = nan
+	.long	srem_dnan	|  11,00  nan,norm  = nan
+	.long	srem_dnan	|  11,01  nan,zero  = nan
+	.long	srem_dnan	|  11,10  nan,inf   = nan
+	.long	srem_dnan	|  11,11  nan,nan   = nan
+
+	.global	prem
+prem:
+	clrb	FPSR_QBYTE(%a6)   |clear quotient field
+	bfextu	STAG(%a6){#0:#3},%d0 |stag = d0
+	bfextu	DTAG(%a6){#0:#3},%d1 |dtag = d1
+|
+| Alias extended denorms to norms for the jump table.
+|
+	bclr	#2,%d0
+	bclr	#2,%d1
+
+	lslb	#2,%d1
+	orb	%d0,%d1		|d1{3:2} = dtag, d1{1:0} = stag
+|				;Tag values:
+|				;00 = norm or denorm
+|				;01 = zero
+|				;10 = inf
+|				;11 = nan
+	lea	premt,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+srem_snan:
+	bra	src_nan
+srem_dnan:
+	bra	dst_nan
+srem_oper:
+	bra	t_operr
+srem_zro:
+	moveb	ETEMP(%a6),%d1	|get sign of src op
+	moveb	FPTEMP(%a6),%d0	|get sign of dst op
+	eorb	%d0,%d1		|get exor of sign bits
+	btstl	#7,%d1		|test for sign
+	beqs	srem_zsn	|if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
+srem_zsn:
+	btstl	#7,%d0		|test if + or -
+	beq	ld_pzero	|if pos then load +0
+	bra	ld_mzero	|else neg load -0
+
+srem_fpn:
+	moveb	ETEMP(%a6),%d1	|get sign of src op
+	moveb	FPTEMP(%a6),%d0	|get sign of dst op
+	eorb	%d0,%d1		|get exor of sign bits
+	btstl	#7,%d1		|test for sign
+	beqs	srem_fsn	|if clr, do not set sign big
+	bsetb	#q_sn_bit,FPSR_QBYTE(%a6) |set q-byte sign bit
+srem_fsn:
+	tstb	DTAG(%a6)	|filter out denormal destination case
+	bpls	srem_nrm	|
+	leal	FPTEMP(%a6),%a0	|a0<- addr(FPTEMP)
+	bra	t_resdnrm	|force UNFL(but exact) result
+srem_nrm:
+	fmovel USER_FPCR(%a6),%fpcr |use user's rmode and precision
+	fmovex FPTEMP(%a6),%fp0	|return dest to fp0
+	rts
+|
+|	FSCALE
+|
+pscalet:
+|				;$26 fscale
+|				;dtag,stag
+	.long	sscale		|  00,00  norm,norm = result
+	.long	sscale		|  00,01  norm,zero = fpn
+	.long	scl_opr		|  00,10  norm,inf  = nan with operr
+	.long	scl_snan	|  00,11  norm,nan  = nan
+	.long	scl_zro		|  01,00  zero,norm = +-zero
+	.long	scl_zro		|  01,01  zero,zero = +-zero
+	.long	scl_opr		|  01,10  zero,inf  = nan with operr
+	.long	scl_snan	|  01,11  zero,nan  = nan
+	.long	scl_inf		|  10,00  inf,norm  = +-inf
+	.long	scl_inf		|  10,01  inf,zero  = +-inf
+	.long	scl_opr		|  10,10  inf,inf   = nan with operr
+	.long	scl_snan	|  10,11  inf,nan   = nan
+	.long	scl_dnan	|  11,00  nan,norm  = nan
+	.long	scl_dnan	|  11,01  nan,zero  = nan
+	.long	scl_dnan	|  11,10  nan,inf   = nan
+	.long	scl_dnan	|  11,11  nan,nan   = nan
+
+	.global	pscale
+pscale:
+	bfextu	STAG(%a6){#0:#3},%d0 |stag in d0
+	bfextu	DTAG(%a6){#0:#3},%d1 |dtag in d1
+	bclrl	#2,%d0		|alias  denorm into norm
+	bclrl	#2,%d1		|alias  denorm into norm
+	lslb	#2,%d1
+	orb	%d0,%d1		|d1{4:2} = dtag, d1{1:0} = stag
+|				;dtag values     stag values:
+|				;000 = norm      00 = norm
+|				;001 = zero	 01 = zero
+|				;010 = inf	 10 = inf
+|				;011 = nan	 11 = nan
+|				;100 = dnrm
+|
+|
+	leal	pscalet,%a1	|load start of jump table
+	movel	(%a1,%d1.w*4),%a1	|load a1 with label depending on tag
+	jmp	(%a1)		|go to the routine
+
+scl_opr:
+	bra	t_operr
+
+scl_dnan:
+	bra	dst_nan
+
+scl_zro:
+	btstb	#sign_bit,FPTEMP_EX(%a6)	|test if + or -
+	beq	ld_pzero		|if pos then load +0
+	bra	ld_mzero		|if neg then load -0
+scl_inf:
+	btstb	#sign_bit,FPTEMP_EX(%a6)	|test if + or -
+	beq	ld_pinf			|if pos then load +inf
+	bra	ld_minf			|else neg load -inf
+scl_snan:
+	bra	src_nan
+|
+|	FSINCOS
+|
+	.global	ssincosz
+ssincosz:
+	btstb	#sign_bit,ETEMP(%a6)	|get sign
+	beqs	sincosp
+	fmovex	MZERO,%fp0
+	bras	sincoscom
+sincosp:
+	fmovex PZERO,%fp0
+sincoscom:
+	fmovemx PONE,%fp1-%fp1	|do not allow FPSR to be affected
+	bra	sto_cos		|store cosine result
+
+	.global	ssincosi
+ssincosi:
+	fmovex QNAN,%fp1	|load NAN
+	bsr	sto_cos		|store cosine result
+	fmovex QNAN,%fp0	|load NAN
+	bra	t_operr
+
+	.global	ssincosnan
+ssincosnan:
+	movel	ETEMP_EX(%a6),FP_SCR1(%a6)
+	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
+	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
+	bsetb	#signan_bit,FP_SCR1+4(%a6)
+	fmovemx FP_SCR1(%a6),%fp1-%fp1
+	bsr	sto_cos
+	bra	src_nan
+|
+| This code forces default values for the zero, inf, and nan cases
+| in the transcendentals code.  The CC bits must be set in the
+| stacked FPSR to be correctly reported.
+|
+|**Returns +PI/2
+	.global	ld_ppi2
+ld_ppi2:
+	fmovex PPIBY2,%fp0		|load +pi/2
+	bra	t_inx2			|set inex2 exc
+
+|**Returns -PI/2
+	.global	ld_mpi2
+ld_mpi2:
+	fmovex MPIBY2,%fp0		|load -pi/2
+	orl	#neg_mask,USER_FPSR(%a6)	|set N bit
+	bra	t_inx2			|set inex2 exc
+
+|**Returns +inf
+	.global	ld_pinf
+ld_pinf:
+	fmovex PINF,%fp0		|load +inf
+	orl	#inf_mask,USER_FPSR(%a6)	|set I bit
+	rts
+
+|**Returns -inf
+	.global	ld_minf
+ld_minf:
+	fmovex MINF,%fp0		|load -inf
+	orl	#neg_mask+inf_mask,USER_FPSR(%a6)	|set N and I bits
+	rts
+
+|**Returns +1
+	.global	ld_pone
+ld_pone:
+	fmovex PONE,%fp0		|load +1
+	rts
+
+|**Returns -1
+	.global	ld_mone
+ld_mone:
+	fmovex MONE,%fp0		|load -1
+	orl	#neg_mask,USER_FPSR(%a6)	|set N bit
+	rts
+
+|**Returns +0
+	.global	ld_pzero
+ld_pzero:
+	fmovex PZERO,%fp0		|load +0
+	orl	#z_mask,USER_FPSR(%a6)	|set Z bit
+	rts
+
+|**Returns -0
+	.global	ld_mzero
+ld_mzero:
+	fmovex MZERO,%fp0		|load -0
+	orl	#neg_mask+z_mask,USER_FPSR(%a6)	|set N and Z bits
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/fpsp.h b/arch/m68k/fpsp040/fpsp.h
new file mode 100644
index 0000000..984a4eb
--- /dev/null
+++ b/arch/m68k/fpsp040/fpsp.h
@@ -0,0 +1,348 @@
+|
+|	fpsp.h 3.3 3.3
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|	fpsp.h --- stack frame offsets during FPSP exception handling
+|
+|	These equates are used to access the exception frame, the fsave
+|	frame and any local variables needed by the FPSP package.
+|
+|	All FPSP handlers begin by executing:
+|
+|		link	a6,#-LOCAL_SIZE
+|		fsave	-(a7)
+|		movem.l	d0-d1/a0-a1,USER_DA(a6)
+|		fmovem.x fp0-fp3,USER_FP0(a6)
+|		fmove.l	fpsr/fpcr/fpiar,USER_FPSR(a6)
+|
+|	After initialization, the stack looks like this:
+|
+|	A7 --->	+-------------------------------+
+|		|				|
+|		|	FPU fsave area		|
+|		|				|
+|		+-------------------------------+
+|		|				|
+|		|	FPSP Local Variables	|
+|		|	     including		|
+|		|	  saved registers	|
+|		|				|
+|		+-------------------------------+
+|	A6 --->	|	Saved A6		|
+|		+-------------------------------+
+|		|				|
+|		|	Exception Frame		|
+|		|				|
+|		|				|
+|
+|	Positive offsets from A6 refer to the exception frame.  Negative
+|	offsets refer to the Local Variable area and the fsave area.
+|	The fsave frame is also accessible from the top via A7.
+|
+|	On exit, the handlers execute:
+|
+|		movem.l	USER_DA(a6),d0-d1/a0-a1
+|		fmovem.x USER_FP0(a6),fp0-fp3
+|		fmove.l	USER_FPSR(a6),fpsr/fpcr/fpiar
+|		frestore (a7)+
+|		unlk	a6
+|
+|	and then either "bra fpsp_done" if the exception was completely
+|	handled	by the package, or "bra real_xxxx" which is an external
+|	label to a routine that will process a real exception of the
+|	type that was generated.  Some handlers may omit the "frestore"
+|	if the FPU state after the exception is idle.
+|
+|	Sometimes the exception handler will transform the fsave area
+|	because it needs to report an exception back to the user.  This
+|	can happen if the package is entered for an unimplemented float
+|	instruction that generates (say) an underflow.  Alternatively,
+|	a second fsave frame can be pushed onto the stack and the
+|	handler	exit code will reload the new frame and discard the old.
+|
+|	The registers d0, d1, a0, a1 and fp0-fp3 are always saved and
+|	restored from the "local variable" area and can be used as
+|	temporaries.  If a routine needs to change any
+|	of these registers, it should modify the saved copy and let
+|	the handler exit code restore the value.
+|
+|----------------------------------------------------------------------
+|
+|	Local Variables on the stack
+|
+	.set	LOCAL_SIZE,192		| bytes needed for local variables
+	.set	LV,-LOCAL_SIZE	| convenient base value
+|
+	.set	USER_DA,LV+0		| save space for D0-D1,A0-A1
+	.set	USER_D0,LV+0		| saved user D0
+	.set	USER_D1,LV+4		| saved user D1
+	.set	USER_A0,LV+8		| saved user A0
+	.set	USER_A1,LV+12		| saved user A1
+	.set	USER_FP0,LV+16		| saved user FP0
+	.set	USER_FP1,LV+28		| saved user FP1
+	.set	USER_FP2,LV+40		| saved user FP2
+	.set	USER_FP3,LV+52		| saved user FP3
+	.set	USER_FPCR,LV+64		| saved user FPCR
+	.set	FPCR_ENABLE,USER_FPCR+2	|	FPCR exception enable
+	.set	FPCR_MODE,USER_FPCR+3	|	FPCR rounding mode control
+	.set	USER_FPSR,LV+68		| saved user FPSR
+	.set	FPSR_CC,USER_FPSR+0	|	FPSR condition code
+	.set	FPSR_QBYTE,USER_FPSR+1	|	FPSR quotient
+	.set	FPSR_EXCEPT,USER_FPSR+2	|	FPSR exception
+	.set	FPSR_AEXCEPT,USER_FPSR+3	|	FPSR accrued exception
+	.set	USER_FPIAR,LV+72		| saved user FPIAR
+	.set	FP_SCR1,LV+76		| room for a temporary float value
+	.set	FP_SCR2,LV+92		| room for a temporary float value
+	.set	L_SCR1,LV+108		| room for a temporary long value
+	.set	L_SCR2,LV+112		| room for a temporary long value
+	.set	STORE_FLG,LV+116
+	.set	BINDEC_FLG,LV+117		| used in bindec
+	.set	DNRM_FLG,LV+118		| used in res_func
+	.set	RES_FLG,LV+119		| used in res_func
+	.set	DY_MO_FLG,LV+120		| dyadic/monadic flag
+	.set	UFLG_TMP,LV+121		| temporary for uflag errata
+	.set	CU_ONLY,LV+122		| cu-only flag
+	.set	VER_TMP,LV+123		| temp holding for version number
+	.set	L_SCR3,LV+124		| room for a temporary long value
+	.set	FP_SCR3,LV+128		| room for a temporary float value
+	.set	FP_SCR4,LV+144		| room for a temporary float value
+	.set	FP_SCR5,LV+160		| room for a temporary float value
+	.set	FP_SCR6,LV+176
+|
+|NEXT		equ	LV+192		;need to increase LOCAL_SIZE
+|
+|--------------------------------------------------------------------------
+|
+|	fsave offsets and bit definitions
+|
+|	Offsets are defined from the end of an fsave because the last 10
+|	words of a busy frame are the same as the unimplemented frame.
+|
+	.set	CU_SAVEPC,LV-92		| micro-pc for CU (1 byte)
+	.set	FPR_DIRTY_BITS,LV-91		| fpr dirty bits
+|
+	.set	WBTEMP,LV-76		| write back temp (12 bytes)
+	.set	WBTEMP_EX,WBTEMP		| wbtemp sign and exponent (2 bytes)
+	.set	WBTEMP_HI,WBTEMP+4	| wbtemp mantissa [63:32] (4 bytes)
+	.set	WBTEMP_LO,WBTEMP+8	| wbtemp mantissa [31:00] (4 bytes)
+|
+	.set	WBTEMP_SGN,WBTEMP+2	| used to store sign
+|
+	.set	FPSR_SHADOW,LV-64		| fpsr shadow reg
+|
+	.set	FPIARCU,LV-60		| Instr. addr. reg. for CU (4 bytes)
+|
+	.set	CMDREG2B,LV-52		| cmd reg for machine 2
+	.set	CMDREG3B,LV-48		| cmd reg for E3 exceptions (2 bytes)
+|
+	.set	NMNEXC,LV-44		| NMNEXC (unsup,snan bits only)
+	.set	nmn_unsup_bit,1	|
+	.set	nmn_snan_bit,0	|
+|
+	.set	NMCEXC,LV-43		| NMNEXC & NMCEXC
+	.set	nmn_operr_bit,7
+	.set	nmn_ovfl_bit,6
+	.set	nmn_unfl_bit,5
+	.set	nmc_unsup_bit,4
+	.set	nmc_snan_bit,3
+	.set	nmc_operr_bit,2
+	.set	nmc_ovfl_bit,1
+	.set	nmc_unfl_bit,0
+|
+	.set	STAG,LV-40		| source tag (1 byte)
+	.set	WBTEMP_GRS,LV-40		| alias wbtemp guard, round, sticky
+	.set	guard_bit,1		| guard bit is bit number 1
+	.set	round_bit,0		| round bit is bit number 0
+	.set	stag_mask,0xE0		| upper 3 bits are source tag type
+	.set	denorm_bit,7		| bit determines if denorm or unnorm
+	.set	etemp15_bit,4		| etemp exponent bit #15
+	.set	wbtemp66_bit,2		| wbtemp mantissa bit #66
+	.set	wbtemp1_bit,1		| wbtemp mantissa bit #1
+	.set	wbtemp0_bit,0		| wbtemp mantissa bit #0
+|
+	.set	STICKY,LV-39		| holds sticky bit
+	.set	sticky_bit,7
+|
+	.set	CMDREG1B,LV-36		| cmd reg for E1 exceptions (2 bytes)
+	.set	kfact_bit,12		| distinguishes static/dynamic k-factor
+|					;on packed move outs.  NOTE: this
+|					;equate only works when CMDREG1B is in
+|					;a register.
+|
+	.set	CMDWORD,LV-35		| command word in cmd1b
+	.set	direction_bit,5		| bit 0 in opclass
+	.set	size_bit2,12		| bit 2 in size field
+|
+	.set	DTAG,LV-32		| dest tag (1 byte)
+	.set	dtag_mask,0xE0		| upper 3 bits are dest type tag
+	.set	fptemp15_bit,4		| fptemp exponent bit #15
+|
+	.set	WB_BYTE,LV-31		| holds WBTE15 bit (1 byte)
+	.set	wbtemp15_bit,4		| wbtemp exponent bit #15
+|
+	.set	E_BYTE,LV-28		| holds E1 and E3 bits (1 byte)
+	.set	E1,2		| which bit is E1 flag
+	.set	E3,1		| which bit is E3 flag
+	.set	SFLAG,0		| which bit is S flag
+|
+	.set	T_BYTE,LV-27		| holds T and U bits (1 byte)
+	.set	XFLAG,7		| which bit is X flag
+	.set	UFLAG,5		| which bit is U flag
+	.set	TFLAG,4		| which bit is T flag
+|
+	.set	FPTEMP,LV-24		| fptemp (12 bytes)
+	.set	FPTEMP_EX,FPTEMP		| fptemp sign and exponent (2 bytes)
+	.set	FPTEMP_HI,FPTEMP+4	| fptemp mantissa [63:32] (4 bytes)
+	.set	FPTEMP_LO,FPTEMP+8	| fptemp mantissa [31:00] (4 bytes)
+|
+	.set	FPTEMP_SGN,FPTEMP+2	| used to store sign
+|
+	.set	ETEMP,LV-12		| etemp (12 bytes)
+	.set	ETEMP_EX,ETEMP		| etemp sign and exponent (2 bytes)
+	.set	ETEMP_HI,ETEMP+4		| etemp mantissa [63:32] (4 bytes)
+	.set	ETEMP_LO,ETEMP+8		| etemp mantissa [31:00] (4 bytes)
+|
+	.set	ETEMP_SGN,ETEMP+2		| used to store sign
+|
+	.set	EXC_SR,4		| exception frame status register
+	.set	EXC_PC,6		| exception frame program counter
+	.set	EXC_VEC,10		| exception frame vector (format+vector#)
+	.set	EXC_EA,12		| exception frame effective address
+|
+|--------------------------------------------------------------------------
+|
+|	FPSR/FPCR bits
+|
+	.set	neg_bit,3	|  negative result
+	.set	z_bit,2	|  zero result
+	.set	inf_bit,1	|  infinity result
+	.set	nan_bit,0	|  not-a-number result
+|
+	.set	q_sn_bit,7	|  sign bit of quotient byte
+|
+	.set	bsun_bit,7	|  branch on unordered
+	.set	snan_bit,6	|  signalling nan
+	.set	operr_bit,5	|  operand error
+	.set	ovfl_bit,4	|  overflow
+	.set	unfl_bit,3	|  underflow
+	.set	dz_bit,2	|  divide by zero
+	.set	inex2_bit,1	|  inexact result 2
+	.set	inex1_bit,0	|  inexact result 1
+|
+	.set	aiop_bit,7	|  accrued illegal operation
+	.set	aovfl_bit,6	|  accrued overflow
+	.set	aunfl_bit,5	|  accrued underflow
+	.set	adz_bit,4	|  accrued divide by zero
+	.set	ainex_bit,3	|  accrued inexact
+|
+|	FPSR individual bit masks
+|
+	.set	neg_mask,0x08000000
+	.set	z_mask,0x04000000
+	.set	inf_mask,0x02000000
+	.set	nan_mask,0x01000000
+|
+	.set	bsun_mask,0x00008000	|
+	.set	snan_mask,0x00004000
+	.set	operr_mask,0x00002000
+	.set	ovfl_mask,0x00001000
+	.set	unfl_mask,0x00000800
+	.set	dz_mask,0x00000400
+	.set	inex2_mask,0x00000200
+	.set	inex1_mask,0x00000100
+|
+	.set	aiop_mask,0x00000080	|  accrued illegal operation
+	.set	aovfl_mask,0x00000040	|  accrued overflow
+	.set	aunfl_mask,0x00000020	|  accrued underflow
+	.set	adz_mask,0x00000010	|  accrued divide by zero
+	.set	ainex_mask,0x00000008	|  accrued inexact
+|
+|	FPSR combinations used in the FPSP
+|
+	.set	dzinf_mask,inf_mask+dz_mask+adz_mask
+	.set	opnan_mask,nan_mask+operr_mask+aiop_mask
+	.set	nzi_mask,0x01ffffff	|  clears N, Z, and I
+	.set	unfinx_mask,unfl_mask+inex2_mask+aunfl_mask+ainex_mask
+	.set	unf2inx_mask,unfl_mask+inex2_mask+ainex_mask
+	.set	ovfinx_mask,ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
+	.set	inx1a_mask,inex1_mask+ainex_mask
+	.set	inx2a_mask,inex2_mask+ainex_mask
+	.set	snaniop_mask,nan_mask+snan_mask+aiop_mask
+	.set	naniop_mask,nan_mask+aiop_mask
+	.set	neginf_mask,neg_mask+inf_mask
+	.set	infaiop_mask,inf_mask+aiop_mask
+	.set	negz_mask,neg_mask+z_mask
+	.set	opaop_mask,operr_mask+aiop_mask
+	.set	unfl_inx_mask,unfl_mask+aunfl_mask+ainex_mask
+	.set	ovfl_inx_mask,ovfl_mask+aovfl_mask+ainex_mask
+|
+|--------------------------------------------------------------------------
+|
+|	FPCR rounding modes
+|
+	.set	x_mode,0x00	|  round to extended
+	.set	s_mode,0x40	|  round to single
+	.set	d_mode,0x80	|  round to double
+|
+	.set	rn_mode,0x00	|  round nearest
+	.set	rz_mode,0x10	|  round to zero
+	.set	rm_mode,0x20	|  round to minus infinity
+	.set	rp_mode,0x30	|  round to plus infinity
+|
+|--------------------------------------------------------------------------
+|
+|	Miscellaneous equates
+|
+	.set	signan_bit,6	|  signalling nan bit in mantissa
+	.set	sign_bit,7
+|
+	.set	rnd_stky_bit,29	|  round/sticky bit of mantissa
+|				this can only be used if in a data register
+	.set	sx_mask,0x01800000 |  set s and x bits in word $48
+|
+	.set	LOCAL_EX,0
+	.set	LOCAL_SGN,2
+	.set	LOCAL_HI,4
+	.set	LOCAL_LO,8
+	.set	LOCAL_GRS,12	|  valid ONLY for FP_SCR1, FP_SCR2
+|
+|
+	.set	norm_tag,0x00	|  tag bits in {7:5} position
+	.set	zero_tag,0x20
+	.set	inf_tag,0x40
+	.set	nan_tag,0x60
+	.set	dnrm_tag,0x80
+|
+|	fsave sizes and formats
+|
+	.set	VER_4,0x40		|  fpsp compatible version numbers
+|					are in the $40s {$40-$4f}
+	.set	VER_40,0x40		|  original version number
+	.set	VER_41,0x41		|  revision version number
+|
+	.set	BUSY_SIZE,100		|  size of busy frame
+	.set	BUSY_FRAME,LV-BUSY_SIZE	|  start of busy frame
+|
+	.set	UNIMP_40_SIZE,44		|  size of orig unimp frame
+	.set	UNIMP_41_SIZE,52		|  size of rev unimp frame
+|
+	.set	IDLE_SIZE,4		|  size of idle frame
+	.set	IDLE_FRAME,LV-IDLE_SIZE	|  start of idle frame
+|
+|	exception vectors
+|
+	.set	TRACE_VEC,0x2024		|  trace trap
+	.set	FLINE_VEC,0x002C		|  real F-line
+	.set	UNIMP_VEC,0x202C		|  unimplemented
+	.set	INEX_VEC,0x00C4
+|
+	.set	dbl_thresh,0x3C01
+	.set	sgl_thresh,0x3F81
+|
diff --git a/arch/m68k/fpsp040/gen_except.S b/arch/m68k/fpsp040/gen_except.S
new file mode 100644
index 0000000..401d06f
--- /dev/null
+++ b/arch/m68k/fpsp040/gen_except.S
@@ -0,0 +1,468 @@
+|
+|	gen_except.sa 3.7 1/16/92
+|
+|	gen_except --- FPSP routine to detect reportable exceptions
+|
+|	This routine compares the exception enable byte of the
+|	user_fpcr on the stack with the exception status byte
+|	of the user_fpsr.
+|
+|	Any routine which may report an exceptions must load
+|	the stack frame in memory with the exceptional operand(s).
+|
+|	Priority for exceptions is:
+|
+|	Highest:	bsun
+|			snan
+|			operr
+|			ovfl
+|			unfl
+|			dz
+|			inex2
+|	Lowest:		inex1
+|
+|	Note: The IEEE standard specifies that inex2 is to be
+|	reported if ovfl occurs and the ovfl enable bit is not
+|	set but the inex2 enable bit is.
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+GEN_EXCEPT:    |idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section 8
+
+#include "fpsp.h"
+
+	|xref	real_trace
+	|xref	fpsp_done
+	|xref	fpsp_fmt_error
+
+exc_tbl:
+	.long	bsun_exc
+	.long	commonE1
+	.long	commonE1
+	.long	ovfl_unfl
+	.long	ovfl_unfl
+	.long	commonE1
+	.long	commonE3
+	.long	commonE3
+	.long	no_match
+
+	.global	gen_except
+gen_except:
+	cmpib	#IDLE_SIZE-4,1(%a7)	|test for idle frame
+	beq	do_check		|go handle idle frame
+	cmpib	#UNIMP_40_SIZE-4,1(%a7)	|test for orig unimp frame
+	beqs	unimp_x			|go handle unimp frame
+	cmpib	#UNIMP_41_SIZE-4,1(%a7)	|test for rev unimp frame
+	beqs	unimp_x			|go handle unimp frame
+	cmpib	#BUSY_SIZE-4,1(%a7)	|if size <> $60, fmt error
+	bnel	fpsp_fmt_error
+	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1 |init a1 so fpsp.h
+|					;equates will work
+| Fix up the new busy frame with entries from the unimp frame
+|
+	movel	ETEMP_EX(%a6),ETEMP_EX(%a1) |copy etemp from unimp
+	movel	ETEMP_HI(%a6),ETEMP_HI(%a1) |frame to busy frame
+	movel	ETEMP_LO(%a6),ETEMP_LO(%a1)
+	movel	CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
+	movel	CMDREG1B(%a6),%d0		|fix cmd1b to make it
+	andl	#0x03c30000,%d0		|work for cmd3b
+	bfextu	CMDREG1B(%a6){#13:#1},%d1	|extract bit 2
+	lsll	#5,%d1
+	swap	%d1
+	orl	%d1,%d0			|put it in the right place
+	bfextu	CMDREG1B(%a6){#10:#3},%d1	|extract bit 3,4,5
+	lsll	#2,%d1
+	swap	%d1
+	orl	%d1,%d0			|put them in the right place
+	movel	%d0,CMDREG3B(%a1)		|in the busy frame
+|
+| Or in the FPSR from the emulation with the USER_FPSR on the stack.
+|
+	fmovel	%FPSR,%d0
+	orl	%d0,USER_FPSR(%a6)
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
+	orl	#sx_mask,E_BYTE(%a1)
+	bra	do_clean
+
+|
+| Frame is an unimp frame possible resulting from an fmove <ea>,fp0
+| that caused an exception
+|
+| a1 is modified to point into the new frame allowing fpsp equates
+| to be valid.
+|
+unimp_x:
+	cmpib	#UNIMP_40_SIZE-4,1(%a7)	|test for orig unimp frame
+	bnes	test_rev
+	leal	UNIMP_40_SIZE+LOCAL_SIZE(%a7),%a1
+	bras	unimp_con
+test_rev:
+	cmpib	#UNIMP_41_SIZE-4,1(%a7)	|test for rev unimp frame
+	bnel	fpsp_fmt_error		|if not $28 or $30
+	leal	UNIMP_41_SIZE+LOCAL_SIZE(%a7),%a1
+
+unimp_con:
+|
+| Fix up the new unimp frame with entries from the old unimp frame
+|
+	movel	CMDREG1B(%a6),CMDREG1B(%a1) |set inst in frame to unimp
+|
+| Or in the FPSR from the emulation with the USER_FPSR on the stack.
+|
+	fmovel	%FPSR,%d0
+	orl	%d0,USER_FPSR(%a6)
+	bra	do_clean
+
+|
+| Frame is idle, so check for exceptions reported through
+| USER_FPSR and set the unimp frame accordingly.
+| A7 must be incremented to the point before the
+| idle fsave vector to the unimp vector.
+|
+
+do_check:
+	addl	#4,%a7			|point A7 back to unimp frame
+|
+| Or in the FPSR from the emulation with the USER_FPSR on the stack.
+|
+	fmovel	%FPSR,%d0
+	orl	%d0,USER_FPSR(%a6)
+|
+| On a busy frame, we must clear the nmnexc bits.
+|
+	cmpib	#BUSY_SIZE-4,1(%a7)	|check frame type
+	bnes	check_fr		|if busy, clr nmnexc
+	clrw	NMNEXC(%a6)		|clr nmnexc & nmcexc
+	btstb	#5,CMDREG1B(%a6)		|test for fmove out
+	bnes	frame_com
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6) |set exc bits
+	orl	#sx_mask,E_BYTE(%a6)
+	bras	frame_com
+check_fr:
+	cmpb	#UNIMP_40_SIZE-4,1(%a7)
+	beqs	frame_com
+	clrw	NMNEXC(%a6)
+frame_com:
+	moveb	FPCR_ENABLE(%a6),%d0	|get fpcr enable byte
+	andb	FPSR_EXCEPT(%a6),%d0	|and in the fpsr exc byte
+	bfffo	%d0{#24:#8},%d1		|test for first set bit
+	leal	exc_tbl,%a0		|load jmp table address
+	subib	#24,%d1			|normalize bit offset to 0-8
+	movel	(%a0,%d1.w*4),%a0		|load routine address based
+|					;based on first enabled exc
+	jmp	(%a0)			|jump to routine
+|
+| Bsun is not possible in unimp or unsupp
+|
+bsun_exc:
+	bra	do_clean
+|
+| The typical work to be done to the unimp frame to report an
+| exception is to set the E1/E3 byte and clr the U flag.
+| commonE1 does this for E1 exceptions, which are snan,
+| operr, and dz.  commonE3 does this for E3 exceptions, which
+| are inex2 and inex1, and also clears the E1 exception bit
+| left over from the unimp exception.
+|
+commonE1:
+	bsetb	#E1,E_BYTE(%a6)		|set E1 flag
+	bra	commonE			|go clean and exit
+
+commonE3:
+	tstb	UFLG_TMP(%a6)		|test flag for unsup/unimp state
+	bnes	unsE3
+uniE3:
+	bsetb	#E3,E_BYTE(%a6)		|set E3 flag
+	bclrb	#E1,E_BYTE(%a6)		|clr E1 from unimp
+	bra	commonE
+
+unsE3:
+	tstb	RES_FLG(%a6)
+	bnes	unsE3_0
+unsE3_1:
+	bsetb	#E3,E_BYTE(%a6)		|set E3 flag
+unsE3_0:
+	bclrb	#E1,E_BYTE(%a6)		|clr E1 flag
+	movel	CMDREG1B(%a6),%d0
+	andl	#0x03c30000,%d0		|work for cmd3b
+	bfextu	CMDREG1B(%a6){#13:#1},%d1	|extract bit 2
+	lsll	#5,%d1
+	swap	%d1
+	orl	%d1,%d0			|put it in the right place
+	bfextu	CMDREG1B(%a6){#10:#3},%d1	|extract bit 3,4,5
+	lsll	#2,%d1
+	swap	%d1
+	orl	%d1,%d0			|put them in the right place
+	movel	%d0,CMDREG3B(%a6)		|in the busy frame
+
+commonE:
+	bclrb	#UFLAG,T_BYTE(%a6)	|clr U flag from unimp
+	bra	do_clean		|go clean and exit
+|
+| No bits in the enable byte match existing exceptions.  Check for
+| the case of the ovfl exc without the ovfl enabled, but with
+| inex2 enabled.
+|
+no_match:
+	btstb	#inex2_bit,FPCR_ENABLE(%a6) |check for ovfl/inex2 case
+	beqs	no_exc			|if clear, exit
+	btstb	#ovfl_bit,FPSR_EXCEPT(%a6) |now check ovfl
+	beqs	no_exc			|if clear, exit
+	bras	ovfl_unfl		|go to unfl_ovfl to determine if
+|					;it is an unsupp or unimp exc
+
+| No exceptions are to be reported.  If the instruction was
+| unimplemented, no FPU restore is necessary.  If it was
+| unsupported, we must perform the restore.
+no_exc:
+	tstb	UFLG_TMP(%a6)	|test flag for unsupp/unimp state
+	beqs	uni_no_exc
+uns_no_exc:
+	tstb	RES_FLG(%a6)	|check if frestore is needed
+	bne	do_clean	|if clear, no frestore needed
+uni_no_exc:
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx USER_FP0(%a6),%fp0-%fp3
+	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk	%a6
+	bra	finish_up
+|
+| Unsupported Data Type Handler:
+| Ovfl:
+|   An fmoveout that results in an overflow is reported this way.
+| Unfl:
+|   An fmoveout that results in an underflow is reported this way.
+|
+| Unimplemented Instruction Handler:
+| Ovfl:
+|   Only scosh, setox, ssinh, stwotox, and scale can set overflow in
+|   this manner.
+| Unfl:
+|   Stwotox, setox, and scale can set underflow in this manner.
+|   Any of the other Library Routines such that f(x)=x in which
+|   x is an extended denorm can report an underflow exception.
+|   It is the responsibility of the exception-causing exception
+|   to make sure that WBTEMP is correct.
+|
+|   The exceptional operand is in FP_SCR1.
+|
+ovfl_unfl:
+	tstb	UFLG_TMP(%a6)	|test flag for unsupp/unimp state
+	beqs	ofuf_con
+|
+| The caller was from an unsupported data type trap.  Test if the
+| caller set CU_ONLY.  If so, the exceptional operand is expected in
+| FPTEMP, rather than WBTEMP.
+|
+	tstb	CU_ONLY(%a6)		|test if inst is cu-only
+	beq	unsE3
+|	move.w	#$fe,CU_SAVEPC(%a6)
+	clrb	CU_SAVEPC(%a6)
+	bsetb	#E1,E_BYTE(%a6)		|set E1 exception flag
+	movew	ETEMP_EX(%a6),FPTEMP_EX(%a6)
+	movel	ETEMP_HI(%a6),FPTEMP_HI(%a6)
+	movel	ETEMP_LO(%a6),FPTEMP_LO(%a6)
+	bsetb	#fptemp15_bit,DTAG(%a6)	|set fpte15
+	bclrb	#UFLAG,T_BYTE(%a6)	|clr U flag from unimp
+	bra	do_clean		|go clean and exit
+
+ofuf_con:
+	moveb	(%a7),VER_TMP(%a6)	|save version number
+	cmpib	#BUSY_SIZE-4,1(%a7)	|check for busy frame
+	beqs	busy_fr			|if unimp, grow to busy
+	cmpib	#VER_40,(%a7)		|test for orig unimp frame
+	bnes	try_41			|if not, test for rev frame
+	moveql	#13,%d0			|need to zero 14 lwords
+	bras	ofuf_fin
+try_41:
+	cmpib	#VER_41,(%a7)		|test for rev unimp frame
+	bnel	fpsp_fmt_error		|if neither, exit with error
+	moveql	#11,%d0			|need to zero 12 lwords
+
+ofuf_fin:
+	clrl	(%a7)
+loop1:
+	clrl	-(%a7)			|clear and dec a7
+	dbra	%d0,loop1
+	moveb	VER_TMP(%a6),(%a7)
+	moveb	#BUSY_SIZE-4,1(%a7)		|write busy fmt word.
+busy_fr:
+	movel	FP_SCR1(%a6),WBTEMP_EX(%a6)	|write
+	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a6)	|exceptional op to
+	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a6)	|wbtemp
+	bsetb	#E3,E_BYTE(%a6)			|set E3 flag
+	bclrb	#E1,E_BYTE(%a6)			|make sure E1 is clear
+	bclrb	#UFLAG,T_BYTE(%a6)		|clr U flag
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl	#sx_mask,E_BYTE(%a6)
+	movel	CMDREG1B(%a6),%d0		|fix cmd1b to make it
+	andl	#0x03c30000,%d0		|work for cmd3b
+	bfextu	CMDREG1B(%a6){#13:#1},%d1	|extract bit 2
+	lsll	#5,%d1
+	swap	%d1
+	orl	%d1,%d0			|put it in the right place
+	bfextu	CMDREG1B(%a6){#10:#3},%d1	|extract bit 3,4,5
+	lsll	#2,%d1
+	swap	%d1
+	orl	%d1,%d0			|put them in the right place
+	movel	%d0,CMDREG3B(%a6)		|in the busy frame
+
+|
+| Check if the frame to be restored is busy or unimp.
+|** NOTE *** Bug fix for errata (0d43b #3)
+| If the frame is unimp, we must create a busy frame to
+| fix the bug with the nmnexc bits in cases in which they
+| are set by a previous instruction and not cleared by
+| the save. The frame will be unimp only if the final
+| instruction in an emulation routine caused the exception
+| by doing an fmove <ea>,fp0.  The exception operand, in
+| internal format, is in fptemp.
+|
+do_clean:
+	cmpib	#UNIMP_40_SIZE-4,1(%a7)
+	bnes	do_con
+	moveql	#13,%d0			|in orig, need to zero 14 lwords
+	bras	do_build
+do_con:
+	cmpib	#UNIMP_41_SIZE-4,1(%a7)
+	bnes	do_restore		|frame must be busy
+	moveql	#11,%d0			|in rev, need to zero 12 lwords
+
+do_build:
+	moveb	(%a7),VER_TMP(%a6)
+	clrl	(%a7)
+loop2:
+	clrl	-(%a7)			|clear and dec a7
+	dbra	%d0,loop2
+|
+| Use a1 as pointer into new frame.  a6 is not correct if an unimp or
+| busy frame was created as the result of an exception on the final
+| instruction of an emulation routine.
+|
+| We need to set the nmcexc bits if the exception is E1. Otherwise,
+| the exc taken will be inex2.
+|
+	leal	BUSY_SIZE+LOCAL_SIZE(%a7),%a1	|init a1 for new frame
+	moveb	VER_TMP(%a6),(%a7)	|write busy fmt word
+	moveb	#BUSY_SIZE-4,1(%a7)
+	movel	FP_SCR1(%a6),WBTEMP_EX(%a1)	|write
+	movel	FP_SCR1+4(%a6),WBTEMP_HI(%a1)	|exceptional op to
+	movel	FP_SCR1+8(%a6),WBTEMP_LO(%a1)	|wbtemp
+|	btst.b	#E1,E_BYTE(%a1)
+|	beq.b	do_restore
+	bfextu	USER_FPSR(%a6){#17:#4},%d0	|get snan/operr/ovfl/unfl bits
+	bfins	%d0,NMCEXC(%a1){#4:#4}	|and insert them in nmcexc
+	movel	USER_FPSR(%a6),FPSR_SHADOW(%a1) |set exc bits
+	orl	#sx_mask,E_BYTE(%a1)
+
+do_restore:
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx USER_FP0(%a6),%fp0-%fp3
+	fmoveml USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore (%a7)+
+	tstb	RES_FLG(%a6)	|RES_FLG indicates a "continuation" frame
+	beq	cont
+	bsr	bug1384
+cont:
+	unlk	%a6
+|
+| If trace mode enabled, then go to trace handler.  This handler
+| cannot have any fp instructions.  If there are fp inst's and an
+| exception has been restored into the machine then the exception
+| will occur upon execution of the fp inst.  This is not desirable
+| in the kernel (supervisor mode).  See MC68040 manual Section 9.3.8.
+|
+finish_up:
+	btstb	#7,(%a7)		|test T1 in SR
+	bnes	g_trace
+	btstb	#6,(%a7)		|test T0 in SR
+	bnes	g_trace
+	bral	fpsp_done
+|
+| Change integer stack to look like trace stack
+| The address of the instruction that caused the
+| exception is already in the integer stack (is
+| the same as the saved friar)
+|
+| If the current frame is already a 6-word stack then all
+| that needs to be done is to change the vector# to TRACE.
+| If the frame is only a 4-word stack (meaning we got here
+| on an Unsupported data type exception), then we need to grow
+| the stack an extra 2 words and get the FPIAR from the FPU.
+|
+g_trace:
+	bftst	EXC_VEC-4(%sp){#0:#4}
+	bne	g_easy
+
+	subw	#4,%sp		| make room
+	movel	4(%sp),(%sp)
+	movel	8(%sp),4(%sp)
+	subw	#BUSY_SIZE,%sp
+	fsave	(%sp)
+	fmovel	%fpiar,BUSY_SIZE+EXC_EA-4(%sp)
+	frestore (%sp)
+	addw	#BUSY_SIZE,%sp
+
+g_easy:
+	movew	#TRACE_VEC,EXC_VEC-4(%a7)
+	bral	real_trace
+|
+|  This is a work-around for hardware bug 1384.
+|
+bug1384:
+	link	%a5,#0
+	fsave	-(%sp)
+	cmpib	#0x41,(%sp)	| check for correct frame
+	beq	frame_41
+	bgt	nofix		| if more advanced mask, do nada
+
+frame_40:
+	tstb	1(%sp)		| check to see if idle
+	bne	notidle
+idle40:
+	clrl	(%sp)		| get rid of old fsave frame
+        movel  %d1,USER_D1(%a6)  | save d1
+	movew	#8,%d1		| place unimp frame instead
+loop40:	clrl	-(%sp)
+	dbra	%d1,loop40
+        movel  USER_D1(%a6),%d1  | restore d1
+	movel	#0x40280000,-(%sp)
+	frestore (%sp)+
+	unlk	%a5
+	rts
+
+frame_41:
+	tstb	1(%sp)		| check to see if idle
+	bne	notidle
+idle41:
+	clrl	(%sp)		| get rid of old fsave frame
+        movel  %d1,USER_D1(%a6)  | save d1
+	movew	#10,%d1		| place unimp frame instead
+loop41:	clrl	-(%sp)
+	dbra	%d1,loop41
+        movel  USER_D1(%a6),%d1  | restore d1
+	movel	#0x41300000,-(%sp)
+	frestore (%sp)+
+	unlk	%a5
+	rts
+
+notidle:
+	bclrb	#etemp15_bit,-40(%a5)
+	frestore (%sp)+
+	unlk	%a5
+	rts
+
+nofix:
+	frestore (%sp)+
+	unlk	%a5
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/get_op.S b/arch/m68k/fpsp040/get_op.S
new file mode 100644
index 0000000..c7c2f37
--- /dev/null
+++ b/arch/m68k/fpsp040/get_op.S
@@ -0,0 +1,676 @@
+|
+|	get_op.sa 3.6 5/19/92
+|
+|	get_op.sa 3.5 4/26/91
+|
+|  Description: This routine is called by the unsupported format/data
+| type exception handler ('unsupp' - vector 55) and the unimplemented
+| instruction exception handler ('unimp' - vector 11).  'get_op'
+| determines the opclass (0, 2, or 3) and branches to the
+| opclass handler routine.  See 68881/2 User's Manual table 4-11
+| for a description of the opclasses.
+|
+| For UNSUPPORTED data/format (exception vector 55) and for
+| UNIMPLEMENTED instructions (exception vector 11) the following
+| applies:
+|
+| - For unnormalized numbers (opclass 0, 2, or 3) the
+| number(s) is normalized and the operand type tag is updated.
+|
+| - For a packed number (opclass 2) the number is unpacked and the
+| operand type tag is updated.
+|
+| - For denormalized numbers (opclass 0 or 2) the number(s) is not
+| changed but passed to the next module.  The next module for
+| unimp is do_func, the next module for unsupp is res_func.
+|
+| For UNSUPPORTED data/format (exception vector 55) only the
+| following applies:
+|
+| - If there is a move out with a packed number (opclass 3) the
+| number is packed and written to user memory.  For the other
+| opclasses the number(s) are written back to the fsave stack
+| and the instruction is then restored back into the '040.  The
+| '040 is then able to complete the instruction.
+|
+| For example:
+| fadd.x fpm,fpn where the fpm contains an unnormalized number.
+| The '040 takes an unsupported data trap and gets to this
+| routine.  The number is normalized, put back on the stack and
+| then an frestore is done to restore the instruction back into
+| the '040.  The '040 then re-executes the fadd.x fpm,fpn with
+| a normalized number in the source and the instruction is
+| successful.
+|
+| Next consider if in the process of normalizing the un-
+| normalized number it becomes a denormalized number.  The
+| routine which converts the unnorm to a norm (called mk_norm)
+| detects this and tags the number as a denorm.  The routine
+| res_func sees the denorm tag and converts the denorm to a
+| norm.  The instruction is then restored back into the '040
+| which re_executes the instruction.
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+GET_OP:    |idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	.global	PIRN,PIRZRM,PIRP
+	.global	SMALRN,SMALRZRM,SMALRP
+	.global	BIGRN,BIGRZRM,BIGRP
+
+PIRN:
+	.long 0x40000000,0xc90fdaa2,0x2168c235    |pi
+PIRZRM:
+	.long 0x40000000,0xc90fdaa2,0x2168c234    |pi
+PIRP:
+	.long 0x40000000,0xc90fdaa2,0x2168c235    |pi
+
+|round to nearest
+SMALRN:
+	.long 0x3ffd0000,0x9a209a84,0xfbcff798    |log10(2)
+	.long 0x40000000,0xadf85458,0xa2bb4a9a    |e
+	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc    |log2(e)
+	.long 0x3ffd0000,0xde5bd8a9,0x37287195    |log10(e)
+	.long 0x00000000,0x00000000,0x00000000    |0.0
+| round to zero;round to negative infinity
+SMALRZRM:
+	.long 0x3ffd0000,0x9a209a84,0xfbcff798    |log10(2)
+	.long 0x40000000,0xadf85458,0xa2bb4a9a    |e
+	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bb    |log2(e)
+	.long 0x3ffd0000,0xde5bd8a9,0x37287195    |log10(e)
+	.long 0x00000000,0x00000000,0x00000000    |0.0
+| round to positive infinity
+SMALRP:
+	.long 0x3ffd0000,0x9a209a84,0xfbcff799    |log10(2)
+	.long 0x40000000,0xadf85458,0xa2bb4a9b    |e
+	.long 0x3fff0000,0xb8aa3b29,0x5c17f0bc    |log2(e)
+	.long 0x3ffd0000,0xde5bd8a9,0x37287195    |log10(e)
+	.long 0x00000000,0x00000000,0x00000000    |0.0
+
+|round to nearest
+BIGRN:
+	.long 0x3ffe0000,0xb17217f7,0xd1cf79ac    |ln(2)
+	.long 0x40000000,0x935d8ddd,0xaaa8ac17    |ln(10)
+	.long 0x3fff0000,0x80000000,0x00000000    |10 ^ 0
+
+	.global	PTENRN
+PTENRN:
+	.long 0x40020000,0xA0000000,0x00000000    |10 ^ 1
+	.long 0x40050000,0xC8000000,0x00000000    |10 ^ 2
+	.long 0x400C0000,0x9C400000,0x00000000    |10 ^ 4
+	.long 0x40190000,0xBEBC2000,0x00000000    |10 ^ 8
+	.long 0x40340000,0x8E1BC9BF,0x04000000    |10 ^ 16
+	.long 0x40690000,0x9DC5ADA8,0x2B70B59E    |10 ^ 32
+	.long 0x40D30000,0xC2781F49,0xFFCFA6D5    |10 ^ 64
+	.long 0x41A80000,0x93BA47C9,0x80E98CE0    |10 ^ 128
+	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E    |10 ^ 256
+	.long 0x46A30000,0xE319A0AE,0xA60E91C7    |10 ^ 512
+	.long 0x4D480000,0xC9767586,0x81750C17    |10 ^ 1024
+	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5    |10 ^ 2048
+	.long 0x75250000,0xC4605202,0x8A20979B    |10 ^ 4096
+|round to minus infinity
+BIGRZRM:
+	.long 0x3ffe0000,0xb17217f7,0xd1cf79ab    |ln(2)
+	.long 0x40000000,0x935d8ddd,0xaaa8ac16    |ln(10)
+	.long 0x3fff0000,0x80000000,0x00000000    |10 ^ 0
+
+	.global	PTENRM
+PTENRM:
+	.long 0x40020000,0xA0000000,0x00000000    |10 ^ 1
+	.long 0x40050000,0xC8000000,0x00000000    |10 ^ 2
+	.long 0x400C0000,0x9C400000,0x00000000    |10 ^ 4
+	.long 0x40190000,0xBEBC2000,0x00000000    |10 ^ 8
+	.long 0x40340000,0x8E1BC9BF,0x04000000    |10 ^ 16
+	.long 0x40690000,0x9DC5ADA8,0x2B70B59D    |10 ^ 32
+	.long 0x40D30000,0xC2781F49,0xFFCFA6D5    |10 ^ 64
+	.long 0x41A80000,0x93BA47C9,0x80E98CDF    |10 ^ 128
+	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8D    |10 ^ 256
+	.long 0x46A30000,0xE319A0AE,0xA60E91C6    |10 ^ 512
+	.long 0x4D480000,0xC9767586,0x81750C17    |10 ^ 1024
+	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE5    |10 ^ 2048
+	.long 0x75250000,0xC4605202,0x8A20979A    |10 ^ 4096
+|round to positive infinity
+BIGRP:
+	.long 0x3ffe0000,0xb17217f7,0xd1cf79ac    |ln(2)
+	.long 0x40000000,0x935d8ddd,0xaaa8ac17    |ln(10)
+	.long 0x3fff0000,0x80000000,0x00000000    |10 ^ 0
+
+	.global	PTENRP
+PTENRP:
+	.long 0x40020000,0xA0000000,0x00000000    |10 ^ 1
+	.long 0x40050000,0xC8000000,0x00000000    |10 ^ 2
+	.long 0x400C0000,0x9C400000,0x00000000    |10 ^ 4
+	.long 0x40190000,0xBEBC2000,0x00000000    |10 ^ 8
+	.long 0x40340000,0x8E1BC9BF,0x04000000    |10 ^ 16
+	.long 0x40690000,0x9DC5ADA8,0x2B70B59E    |10 ^ 32
+	.long 0x40D30000,0xC2781F49,0xFFCFA6D6    |10 ^ 64
+	.long 0x41A80000,0x93BA47C9,0x80E98CE0    |10 ^ 128
+	.long 0x43510000,0xAA7EEBFB,0x9DF9DE8E    |10 ^ 256
+	.long 0x46A30000,0xE319A0AE,0xA60E91C7    |10 ^ 512
+	.long 0x4D480000,0xC9767586,0x81750C18    |10 ^ 1024
+	.long 0x5A920000,0x9E8B3B5D,0xC53D5DE6    |10 ^ 2048
+	.long 0x75250000,0xC4605202,0x8A20979B    |10 ^ 4096
+
+	|xref	nrm_zero
+	|xref	decbin
+	|xref	round
+
+	.global    get_op
+	.global    uns_getop
+	.global    uni_getop
+get_op:
+	clrb	DY_MO_FLG(%a6)
+	tstb	UFLG_TMP(%a6)	|test flag for unsupp/unimp state
+	beq	uni_getop
+
+uns_getop:
+	btstb	#direction_bit,CMDREG1B(%a6)
+	bne	opclass3	|branch if a fmove out (any kind)
+	btstb	#6,CMDREG1B(%a6)
+	beqs	uns_notpacked
+
+	bfextu	CMDREG1B(%a6){#3:#3},%d0
+	cmpb	#3,%d0
+	beq	pack_source	|check for a packed src op, branch if so
+uns_notpacked:
+	bsr	chk_dy_mo	|set the dyadic/monadic flag
+	tstb	DY_MO_FLG(%a6)
+	beqs	src_op_ck	|if monadic, go check src op
+|				;else, check dst op (fall through)
+
+	btstb	#7,DTAG(%a6)
+	beqs	src_op_ck	|if dst op is norm, check src op
+	bras	dst_ex_dnrm	|else, handle destination unnorm/dnrm
+
+uni_getop:
+	bfextu	CMDREG1B(%a6){#0:#6},%d0 |get opclass and src fields
+	cmpil	#0x17,%d0		|if op class and size fields are $17,
+|				;it is FMOVECR; if not, continue
+|
+| If the instruction is fmovecr, exit get_op.  It is handled
+| in do_func and smovecr.sa.
+|
+	bne	not_fmovecr	|handle fmovecr as an unimplemented inst
+	rts
+
+not_fmovecr:
+	btstb	#E1,E_BYTE(%a6)	|if set, there is a packed operand
+	bne	pack_source	|check for packed src op, branch if so
+
+| The following lines of are coded to optimize on normalized operands
+	moveb	STAG(%a6),%d0
+	orb	DTAG(%a6),%d0	|check if either of STAG/DTAG msb set
+	bmis	dest_op_ck	|if so, some op needs to be fixed
+	rts
+
+dest_op_ck:
+	btstb	#7,DTAG(%a6)	|check for unsupported data types in
+	beqs	src_op_ck	|the destination, if not, check src op
+	bsr	chk_dy_mo	|set dyadic/monadic flag
+	tstb	DY_MO_FLG(%a6)	|
+	beqs	src_op_ck	|if monadic, check src op
+|
+| At this point, destination has an extended denorm or unnorm.
+|
+dst_ex_dnrm:
+	movew	FPTEMP_EX(%a6),%d0 |get destination exponent
+	andiw	#0x7fff,%d0	|mask sign, check if exp = 0000
+	beqs	src_op_ck	|if denorm then check source op.
+|				;denorms are taken care of in res_func
+|				;(unsupp) or do_func (unimp)
+|				;else unnorm fall through
+	leal	FPTEMP(%a6),%a0	|point a0 to dop - used in mk_norm
+	bsr	mk_norm		|go normalize - mk_norm returns:
+|				;L_SCR1{7:5} = operand tag
+|				;	(000 = norm, 100 = denorm)
+|				;L_SCR1{4} = fpte15 or ete15
+|				;	0 = exp >  $3fff
+|				;	1 = exp <= $3fff
+|				;and puts the normalized num back
+|				;on the fsave stack
+|
+	moveb L_SCR1(%a6),DTAG(%a6) |write the new tag & fpte15
+|				;to the fsave stack and fall
+|				;through to check source operand
+|
+src_op_ck:
+	btstb	#7,STAG(%a6)
+	beq	end_getop	|check for unsupported data types on the
+|				;source operand
+	btstb	#5,STAG(%a6)
+	bnes	src_sd_dnrm	|if bit 5 set, handle sgl/dbl denorms
+|
+| At this point only unnorms or extended denorms are possible.
+|
+src_ex_dnrm:
+	movew	ETEMP_EX(%a6),%d0 |get source exponent
+	andiw	#0x7fff,%d0	|mask sign, check if exp = 0000
+	beq	end_getop	|if denorm then exit, denorms are
+|				;handled in do_func
+	leal	ETEMP(%a6),%a0	|point a0 to sop - used in mk_norm
+	bsr	mk_norm		|go normalize - mk_norm returns:
+|				;L_SCR1{7:5} = operand tag
+|				;	(000 = norm, 100 = denorm)
+|				;L_SCR1{4} = fpte15 or ete15
+|				;	0 = exp >  $3fff
+|				;	1 = exp <= $3fff
+|				;and puts the normalized num back
+|				;on the fsave stack
+|
+	moveb	L_SCR1(%a6),STAG(%a6) |write the new tag & ete15
+	rts			|end_getop
+
+|
+| At this point, only single or double denorms are possible.
+| If the inst is not fmove, normalize the source.  If it is,
+| do nothing to the input.
+|
+src_sd_dnrm:
+	btstb	#4,CMDREG1B(%a6)	|differentiate between sgl/dbl denorm
+	bnes	is_double
+is_single:
+	movew	#0x3f81,%d1	|write bias for sgl denorm
+	bras	common		|goto the common code
+is_double:
+	movew	#0x3c01,%d1	|write the bias for a dbl denorm
+common:
+	btstb	#sign_bit,ETEMP_EX(%a6) |grab sign bit of mantissa
+	beqs	pos
+	bset	#15,%d1		|set sign bit because it is negative
+pos:
+	movew	%d1,ETEMP_EX(%a6)
+|				;put exponent on stack
+
+	movew	CMDREG1B(%a6),%d1
+	andw	#0xe3ff,%d1	|clear out source specifier
+	orw	#0x0800,%d1	|set source specifier to extended prec
+	movew	%d1,CMDREG1B(%a6)	|write back to the command word in stack
+|				;this is needed to fix unsupp data stack
+	leal	ETEMP(%a6),%a0	|point a0 to sop
+
+	bsr	mk_norm		|convert sgl/dbl denorm to norm
+	moveb	L_SCR1(%a6),STAG(%a6) |put tag into source tag reg - d0
+	rts			|end_getop
+|
+| At this point, the source is definitely packed, whether
+| instruction is dyadic or monadic is still unknown
+|
+pack_source:
+	movel	FPTEMP_LO(%a6),ETEMP(%a6)	|write ms part of packed
+|				;number to etemp slot
+	bsr	chk_dy_mo	|set dyadic/monadic flag
+	bsr	unpack
+
+	tstb	DY_MO_FLG(%a6)
+	beqs	end_getop	|if monadic, exit
+|				;else, fix FPTEMP
+pack_dya:
+	bfextu	CMDREG1B(%a6){#6:#3},%d0 |extract dest fp reg
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0		|set up d0 as a dynamic register mask
+	fmovemx %d0,FPTEMP(%a6)	|write to FPTEMP
+
+	btstb	#7,DTAG(%a6)	|check dest tag for unnorm or denorm
+	bne	dst_ex_dnrm	|else, handle the unnorm or ext denorm
+|
+| Dest is not denormalized.  Check for norm, and set fpte15
+| accordingly.
+|
+	moveb	DTAG(%a6),%d0
+	andib	#0xf0,%d0		|strip to only dtag:fpte15
+	tstb	%d0		|check for normalized value
+	bnes	end_getop	|if inf/nan/zero leave get_op
+	movew	FPTEMP_EX(%a6),%d0
+	andiw	#0x7fff,%d0
+	cmpiw	#0x3fff,%d0	|check if fpte15 needs setting
+	bges	end_getop	|if >= $3fff, leave fpte15=0
+	orb	#0x10,DTAG(%a6)
+	bras	end_getop
+
+|
+| At this point, it is either an fmoveout packed, unnorm or denorm
+|
+opclass3:
+	clrb	DY_MO_FLG(%a6)	|set dyadic/monadic flag to monadic
+	bfextu	CMDREG1B(%a6){#4:#2},%d0
+	cmpib	#3,%d0
+	bne	src_ex_dnrm	|if not equal, must be unnorm or denorm
+|				;else it is a packed move out
+|				;exit
+end_getop:
+	rts
+
+|
+| Sets the DY_MO_FLG correctly. This is used only on if it is an
+| unsupported data type exception.  Set if dyadic.
+|
+chk_dy_mo:
+	movew	CMDREG1B(%a6),%d0
+	btstl	#5,%d0		|testing extension command word
+	beqs	set_mon		|if bit 5 = 0 then monadic
+	btstl	#4,%d0		|know that bit 5 = 1
+	beqs	set_dya		|if bit 4 = 0 then dyadic
+	andiw	#0x007f,%d0	|get rid of all but extension bits {6:0}
+	cmpiw	#0x0038,%d0	|if extension = $38 then fcmp (dyadic)
+	bnes	set_mon
+set_dya:
+	st	DY_MO_FLG(%a6)	|set the inst flag type to dyadic
+	rts
+set_mon:
+	clrb	DY_MO_FLG(%a6)	|set the inst flag type to monadic
+	rts
+|
+|	MK_NORM
+|
+| Normalizes unnormalized numbers, sets tag to norm or denorm, sets unfl
+| exception if denorm.
+|
+| CASE opclass 0x0 unsupp
+|	mk_norm till msb set
+|	set tag = norm
+|
+| CASE opclass 0x0 unimp
+|	mk_norm till msb set or exp = 0
+|	if integer bit = 0
+|	   tag = denorm
+|	else
+|	   tag = norm
+|
+| CASE opclass 011 unsupp
+|	mk_norm till msb set or exp = 0
+|	if integer bit = 0
+|	   tag = denorm
+|	   set unfl_nmcexe = 1
+|	else
+|	   tag = norm
+|
+| if exp <= $3fff
+|   set ete15 or fpte15 = 1
+| else set ete15 or fpte15 = 0
+
+| input:
+|	a0 = points to operand to be normalized
+| output:
+|	L_SCR1{7:5} = operand tag (000 = norm, 100 = denorm)
+|	L_SCR1{4}   = fpte15 or ete15 (0 = exp > $3fff, 1 = exp <=$3fff)
+|	the normalized operand is placed back on the fsave stack
+mk_norm:
+	clrl	L_SCR1(%a6)
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)	|transform into internal extended format
+
+	cmpib	#0x2c,1+EXC_VEC(%a6) |check if unimp
+	bnes	uns_data	|branch if unsupp
+	bsr	uni_inst	|call if unimp (opclass 0x0)
+	bras	reload
+uns_data:
+	btstb	#direction_bit,CMDREG1B(%a6) |check transfer direction
+	bnes	bit_set		|branch if set (opclass 011)
+	bsr	uns_opx		|call if opclass 0x0
+	bras	reload
+bit_set:
+	bsr	uns_op3		|opclass 011
+reload:
+	cmpw	#0x3fff,LOCAL_EX(%a0) |if exp > $3fff
+	bgts	end_mk		|   fpte15/ete15 already set to 0
+	bsetb	#4,L_SCR1(%a6)	|else set fpte15/ete15 to 1
+|				;calling routine actually sets the
+|				;value on the stack (along with the
+|				;tag), since this routine doesn't
+|				;know if it should set ete15 or fpte15
+|				;ie, it doesn't know if this is the
+|				;src op or dest op.
+end_mk:
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	end_mk_pos
+	bsetb	#sign_bit,LOCAL_EX(%a0) |convert back to IEEE format
+end_mk_pos:
+	rts
+|
+|     CASE opclass 011 unsupp
+|
+uns_op3:
+	bsr	nrm_zero	|normalize till msb = 1 or exp = zero
+	btstb	#7,LOCAL_HI(%a0)	|if msb = 1
+	bnes	no_unfl		|then branch
+set_unfl:
+	orw	#dnrm_tag,L_SCR1(%a6) |set denorm tag
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6) |set unfl exception bit
+no_unfl:
+	rts
+|
+|     CASE opclass 0x0 unsupp
+|
+uns_opx:
+	bsr	nrm_zero	|normalize the number
+	btstb	#7,LOCAL_HI(%a0)	|check if integer bit (j-bit) is set
+	beqs	uns_den		|if clear then now have a denorm
+uns_nrm:
+	orb	#norm_tag,L_SCR1(%a6) |set tag to norm
+	rts
+uns_den:
+	orb	#dnrm_tag,L_SCR1(%a6) |set tag to denorm
+	rts
+|
+|     CASE opclass 0x0 unimp
+|
+uni_inst:
+	bsr	nrm_zero
+	btstb	#7,LOCAL_HI(%a0)	|check if integer bit (j-bit) is set
+	beqs	uni_den		|if clear then now have a denorm
+uni_nrm:
+	orb	#norm_tag,L_SCR1(%a6) |set tag to norm
+	rts
+uni_den:
+	orb	#dnrm_tag,L_SCR1(%a6) |set tag to denorm
+	rts
+
+|
+|	Decimal to binary conversion
+|
+| Special cases of inf and NaNs are completed outside of decbin.
+| If the input is an snan, the snan bit is not set.
+|
+| input:
+|	ETEMP(a6)	- points to packed decimal string in memory
+| output:
+|	fp0	- contains packed string converted to extended precision
+|	ETEMP	- same as fp0
+unpack:
+	movew	CMDREG1B(%a6),%d0	|examine command word, looking for fmove's
+	andw	#0x3b,%d0
+	beq	move_unpack	|special handling for fmove: must set FPSR_CC
+
+	movew	ETEMP(%a6),%d0	|get word with inf information
+	bfextu	%d0{#20:#12},%d1	|get exponent into d1
+	cmpiw	#0x0fff,%d1	|test for inf or NaN
+	bnes	try_zero	|if not equal, it is not special
+	bfextu	%d0{#17:#3},%d1	|get SE and y bits into d1
+	cmpiw	#7,%d1		|SE and y bits must be on for special
+	bnes	try_zero	|if not on, it is not special
+|input is of the special cases of inf and NaN
+	tstl	ETEMP_HI(%a6)	|check ms mantissa
+	bnes	fix_nan		|if non-zero, it is a NaN
+	tstl	ETEMP_LO(%a6)	|check ls mantissa
+	bnes	fix_nan		|if non-zero, it is a NaN
+	bra	finish		|special already on stack
+fix_nan:
+	btstb	#signan_bit,ETEMP_HI(%a6) |test for snan
+	bne	finish
+	orl	#snaniop_mask,USER_FPSR(%a6) |always set snan if it is so
+	bra	finish
+try_zero:
+	movew	ETEMP_EX+2(%a6),%d0 |get word 4
+	andiw	#0x000f,%d0	|clear all but last ni(y)bble
+	tstw	%d0		|check for zero.
+	bne	not_spec
+	tstl	ETEMP_HI(%a6)	|check words 3 and 2
+	bne	not_spec
+	tstl	ETEMP_LO(%a6)	|check words 1 and 0
+	bne	not_spec
+	tstl	ETEMP(%a6)	|test sign of the zero
+	bges	pos_zero
+	movel	#0x80000000,ETEMP(%a6) |write neg zero to etemp
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bra	finish
+pos_zero:
+	clrl	ETEMP(%a6)
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bra	finish
+
+not_spec:
+	fmovemx %fp0-%fp1,-(%a7)	|save fp0 - decbin returns in it
+	bsr	decbin
+	fmovex %fp0,ETEMP(%a6)	|put the unpacked sop in the fsave stack
+	fmovemx (%a7)+,%fp0-%fp1
+	fmovel	#0,%FPSR		|clr fpsr from decbin
+	bra	finish
+
+|
+| Special handling for packed move in:  Same results as all other
+| packed cases, but we must set the FPSR condition codes properly.
+|
+move_unpack:
+	movew	ETEMP(%a6),%d0	|get word with inf information
+	bfextu	%d0{#20:#12},%d1	|get exponent into d1
+	cmpiw	#0x0fff,%d1	|test for inf or NaN
+	bnes	mtry_zero	|if not equal, it is not special
+	bfextu	%d0{#17:#3},%d1	|get SE and y bits into d1
+	cmpiw	#7,%d1		|SE and y bits must be on for special
+	bnes	mtry_zero	|if not on, it is not special
+|input is of the special cases of inf and NaN
+	tstl	ETEMP_HI(%a6)	|check ms mantissa
+	bnes	mfix_nan		|if non-zero, it is a NaN
+	tstl	ETEMP_LO(%a6)	|check ls mantissa
+	bnes	mfix_nan		|if non-zero, it is a NaN
+|input is inf
+	orl	#inf_mask,USER_FPSR(%a6) |set I bit
+	tstl	ETEMP(%a6)	|check sign
+	bge	finish
+	orl	#neg_mask,USER_FPSR(%a6) |set N bit
+	bra	finish		|special already on stack
+mfix_nan:
+	orl	#nan_mask,USER_FPSR(%a6) |set NaN bit
+	moveb	#nan_tag,STAG(%a6)	|set stag to NaN
+	btstb	#signan_bit,ETEMP_HI(%a6) |test for snan
+	bnes	mn_snan
+	orl	#snaniop_mask,USER_FPSR(%a6) |set snan bit
+	btstb	#snan_bit,FPCR_ENABLE(%a6) |test for snan enabled
+	bnes	mn_snan
+	bsetb	#signan_bit,ETEMP_HI(%a6) |force snans to qnans
+mn_snan:
+	tstl	ETEMP(%a6)	|check for sign
+	bge	finish		|if clr, go on
+	orl	#neg_mask,USER_FPSR(%a6) |set N bit
+	bra	finish
+
+mtry_zero:
+	movew	ETEMP_EX+2(%a6),%d0 |get word 4
+	andiw	#0x000f,%d0	|clear all but last ni(y)bble
+	tstw	%d0		|check for zero.
+	bnes	mnot_spec
+	tstl	ETEMP_HI(%a6)	|check words 3 and 2
+	bnes	mnot_spec
+	tstl	ETEMP_LO(%a6)	|check words 1 and 0
+	bnes	mnot_spec
+	tstl	ETEMP(%a6)	|test sign of the zero
+	bges	mpos_zero
+	orl	#neg_mask+z_mask,USER_FPSR(%a6) |set N and Z
+	movel	#0x80000000,ETEMP(%a6) |write neg zero to etemp
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bras	finish
+mpos_zero:
+	orl	#z_mask,USER_FPSR(%a6) |set Z
+	clrl	ETEMP(%a6)
+	clrl	ETEMP_HI(%a6)
+	clrl	ETEMP_LO(%a6)
+	bras	finish
+
+mnot_spec:
+	fmovemx %fp0-%fp1,-(%a7)	|save fp0 ,fp1 - decbin returns in fp0
+	bsr	decbin
+	fmovex %fp0,ETEMP(%a6)
+|				;put the unpacked sop in the fsave stack
+	fmovemx (%a7)+,%fp0-%fp1
+
+finish:
+	movew	CMDREG1B(%a6),%d0	|get the command word
+	andw	#0xfbff,%d0	|change the source specifier field to
+|				;extended (was packed).
+	movew	%d0,CMDREG1B(%a6)	|write command word back to fsave stack
+|				;we need to do this so the 040 will
+|				;re-execute the inst. without taking
+|				;another packed trap.
+
+fix_stag:
+|Converted result is now in etemp on fsave stack, now set the source
+|tag (stag)
+|	if (ete =$7fff) then INF or NAN
+|		if (etemp = $x.0----0) then
+|			stag = INF
+|		else
+|			stag = NAN
+|	else
+|		if (ete = $0000) then
+|			stag = ZERO
+|		else
+|			stag = NORM
+|
+| Note also that the etemp_15 bit (just right of the stag) must
+| be set accordingly.
+|
+	movew		ETEMP_EX(%a6),%d1
+	andiw		#0x7fff,%d1   |strip sign
+	cmpw		#0x7fff,%d1
+	bnes		z_or_nrm
+	movel		ETEMP_HI(%a6),%d1
+	bnes		is_nan
+	movel		ETEMP_LO(%a6),%d1
+	bnes		is_nan
+is_inf:
+	moveb		#0x40,STAG(%a6)
+	movel		#0x40,%d0
+	rts
+is_nan:
+	moveb		#0x60,STAG(%a6)
+	movel		#0x60,%d0
+	rts
+z_or_nrm:
+	tstw		%d1
+	bnes		is_nrm
+is_zro:
+| For a zero, set etemp_15
+	moveb		#0x30,STAG(%a6)
+	movel		#0x20,%d0
+	rts
+is_nrm:
+| For a norm, check if the exp <= $3fff; if so, set etemp_15
+	cmpiw		#0x3fff,%d1
+	bles		set_bit15
+	moveb		#0,STAG(%a6)
+	bras		end_is_nrm
+set_bit15:
+	moveb		#0x10,STAG(%a6)
+end_is_nrm:
+	movel		#0,%d0
+end_fix:
+	rts
+
+end_get:
+	rts
+	|end
diff --git a/arch/m68k/fpsp040/kernel_ex.S b/arch/m68k/fpsp040/kernel_ex.S
new file mode 100644
index 0000000..476b711
--- /dev/null
+++ b/arch/m68k/fpsp040/kernel_ex.S
@@ -0,0 +1,494 @@
+|
+|	kernel_ex.sa 3.3 12/19/90
+|
+| This file contains routines to force exception status in the
+| fpu for exceptional cases detected or reported within the
+| transcendental functions.  Typically, the t_xx routine will
+| set the appropriate bits in the USER_FPSR word on the stack.
+| The bits are tested in gen_except.sa to determine if an exceptional
+| situation needs to be created on return from the FPSP.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+KERNEL_EX:    |idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section    8
+
+#include "fpsp.h"
+
+mns_inf:  .long 0xffff0000,0x00000000,0x00000000
+pls_inf:  .long 0x7fff0000,0x00000000,0x00000000
+nan:      .long 0x7fff0000,0xffffffff,0xffffffff
+huge:     .long 0x7ffe0000,0xffffffff,0xffffffff
+
+	|xref	  ovf_r_k
+	|xref	  unf_sub
+	|xref	  nrm_set
+
+	.global	  t_dz
+	.global      t_dz2
+	.global      t_operr
+	.global      t_unfl
+	.global      t_ovfl
+	.global      t_ovfl2
+	.global      t_inx2
+	.global	  t_frcinx
+	.global	  t_extdnrm
+	.global	  t_resdnrm
+	.global	  dst_nan
+	.global	  src_nan
+|
+|	DZ exception
+|
+|
+|	if dz trap disabled
+|		store properly signed inf (use sign of etemp) into fp0
+|		set FPSR exception status dz bit, condition code
+|		inf bit, and accrued dz bit
+|		return
+|		frestore the frame into the machine (done by unimp_hd)
+|
+|	else dz trap enabled
+|		set exception status bit & accrued bits in FPSR
+|		set flag to disable sto_res from corrupting fp register
+|		return
+|		frestore the frame into the machine (done by unimp_hd)
+|
+| t_dz2 is used by monadic functions such as flogn (from do_func).
+| t_dz is used by monadic functions such as satanh (from the
+| transcendental function).
+|
+t_dz2:
+	bsetb	#neg_bit,FPSR_CC(%a6)	|set neg bit in FPSR
+	fmovel	#0,%FPSR			|clr status bits (Z set)
+	btstb	#dz_bit,FPCR_ENABLE(%a6)	|test FPCR for dz exc enabled
+	bnes	dz_ena_end
+	bras	m_inf			|flogx always returns -inf
+t_dz:
+	fmovel	#0,%FPSR			|clr status bits (Z set)
+	btstb	#dz_bit,FPCR_ENABLE(%a6)	|test FPCR for dz exc enabled
+	bnes	dz_ena
+|
+|	dz disabled
+|
+	btstb	#sign_bit,ETEMP_EX(%a6)	|check sign for neg or pos
+	beqs	p_inf			|branch if pos sign
+
+m_inf:
+	fmovemx mns_inf,%fp0-%fp0		|load -inf
+	bsetb	#neg_bit,FPSR_CC(%a6)	|set neg bit in FPSR
+	bras	set_fpsr
+p_inf:
+	fmovemx pls_inf,%fp0-%fp0		|load +inf
+set_fpsr:
+	orl	#dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
+	rts
+|
+|	dz enabled
+|
+dz_ena:
+	btstb	#sign_bit,ETEMP_EX(%a6)	|check sign for neg or pos
+	beqs	dz_ena_end
+	bsetb	#neg_bit,FPSR_CC(%a6)	|set neg bit in FPSR
+dz_ena_end:
+	orl	#dzinf_mask,USER_FPSR(%a6) |set I,DZ,ADZ
+	st	STORE_FLG(%a6)
+	rts
+|
+|	OPERR exception
+|
+|	if (operr trap disabled)
+|		set FPSR exception status operr bit, condition code
+|		nan bit; Store default NAN into fp0
+|		frestore the frame into the machine (done by unimp_hd)
+|
+|	else (operr trap enabled)
+|		set FPSR exception status operr bit, accrued operr bit
+|		set flag to disable sto_res from corrupting fp register
+|		frestore the frame into the machine (done by unimp_hd)
+|
+t_operr:
+	orl	#opnan_mask,USER_FPSR(%a6) |set NaN, OPERR, AIOP
+
+	btstb	#operr_bit,FPCR_ENABLE(%a6) |test FPCR for operr enabled
+	bnes	op_ena
+
+	fmovemx nan,%fp0-%fp0		|load default nan
+	rts
+op_ena:
+	st	STORE_FLG(%a6)		|do not corrupt destination
+	rts
+
+|
+|	t_unfl --- UNFL exception
+|
+| This entry point is used by all routines requiring unfl, inex2,
+| aunfl, and ainex to be set on exit.
+|
+| On entry, a0 points to the exceptional operand.  The final exceptional
+| operand is built in FP_SCR1 and only the sign from the original operand
+| is used.
+|
+t_unfl:
+	clrl	FP_SCR1(%a6)		|set exceptional operand to zero
+	clrl	FP_SCR1+4(%a6)
+	clrl	FP_SCR1+8(%a6)
+	tstb	(%a0)			|extract sign from caller's exop
+	bpls	unfl_signok
+	bset	#sign_bit,FP_SCR1(%a6)
+unfl_signok:
+	leal	FP_SCR1(%a6),%a0
+	orl	#unfinx_mask,USER_FPSR(%a6)
+|					;set UNFL, INEX2, AUNFL, AINEX
+unfl_con:
+	btstb	#unfl_bit,FPCR_ENABLE(%a6)
+	beqs	unfl_dis
+
+unfl_ena:
+	bfclr	STAG(%a6){#5:#3}		|clear wbtm66,wbtm1,wbtm0
+	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
+	bsetb	#sticky_bit,STICKY(%a6)	|set sticky bit
+
+	bclrb	#E1,E_BYTE(%a6)
+
+unfl_dis:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|get round precision
+
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		|convert to internal ext format
+
+	bsr	unf_sub			|returns IEEE result at a0
+|					;and sets FPSR_CC accordingly
+
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
+	beqs	unfl_fin
+
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+	bsetb	#sign_bit,FP_SCR1(%a6)	|set sign bit of exc operand
+
+unfl_fin:
+	fmovemx (%a0),%fp0-%fp0		|store result in fp0
+	rts
+
+
+|
+|	t_ovfl2 --- OVFL exception (without inex2 returned)
+|
+| This entry is used by scale to force catastrophic overflow.  The
+| ovfl, aovfl, and ainex bits are set, but not the inex2 bit.
+|
+t_ovfl2:
+	orl	#ovfl_inx_mask,USER_FPSR(%a6)
+	movel	ETEMP(%a6),FP_SCR1(%a6)
+	movel	ETEMP_HI(%a6),FP_SCR1+4(%a6)
+	movel	ETEMP_LO(%a6),FP_SCR1+8(%a6)
+|
+| Check for single or double round precision.  If single, check if
+| the lower 40 bits of ETEMP are zero; if not, set inex2.  If double,
+| check if the lower 21 bits are zero; if not, set inex2.
+|
+	moveb	FPCR_MODE(%a6),%d0
+	andib	#0xc0,%d0
+	beq	t_work		|if extended, finish ovfl processing
+	cmpib	#0x40,%d0		|test for single
+	bnes	t_dbl
+t_sgl:
+	tstb	ETEMP_LO(%a6)
+	bnes	t_setinx2
+	movel	ETEMP_HI(%a6),%d0
+	andil	#0xff,%d0		|look at only lower 8 bits
+	bnes	t_setinx2
+	bra	t_work
+t_dbl:
+	movel	ETEMP_LO(%a6),%d0
+	andil	#0x7ff,%d0	|look at only lower 11 bits
+	beq	t_work
+t_setinx2:
+	orl	#inex2_mask,USER_FPSR(%a6)
+	bras	t_work
+|
+|	t_ovfl --- OVFL exception
+|
+|** Note: the exc operand is returned in ETEMP.
+|
+t_ovfl:
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+t_work:
+	btstb	#ovfl_bit,FPCR_ENABLE(%a6) |test FPCR for ovfl enabled
+	beqs	ovf_dis
+
+ovf_ena:
+	clrl	FP_SCR1(%a6)		|set exceptional operand
+	clrl	FP_SCR1+4(%a6)
+	clrl	FP_SCR1+8(%a6)
+
+	bfclr	STAG(%a6){#5:#3}		|clear wbtm66,wbtm1,wbtm0
+	bclrb	#wbtemp15_bit,WB_BYTE(%a6) |clear wbtemp15
+	bsetb	#sticky_bit,STICKY(%a6)	|set sticky bit
+
+	bclrb	#E1,E_BYTE(%a6)
+|					;fall through to disabled case
+
+| For disabled overflow call 'ovf_r_k'.  This routine loads the
+| correct result based on the rounding precision, destination
+| format, rounding mode and sign.
+|
+ovf_dis:
+	bsr	ovf_r_k			|returns unsigned ETEMP_EX
+|					;and sets FPSR_CC accordingly.
+	bfclr	ETEMP_SGN(%a6){#0:#8}	|fix sign
+	beqs	ovf_pos
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+	bsetb	#sign_bit,FP_SCR1(%a6)	|set exceptional operand sign
+ovf_pos:
+	fmovemx ETEMP(%a6),%fp0-%fp0		|move the result to fp0
+	rts
+
+
+|
+|	INEX2 exception
+|
+| The inex2 and ainex bits are set.
+|
+t_inx2:
+	orl	#inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
+	rts
+
+|
+|	Force Inex2
+|
+| This routine is called by the transcendental routines to force
+| the inex2 exception bits set in the FPSR.  If the underflow bit
+| is set, but the underflow trap was not taken, the aunfl bit in
+| the FPSR must be set.
+|
+t_frcinx:
+	orl	#inx2a_mask,USER_FPSR(%a6) |set INEX2, AINEX
+	btstb	#unfl_bit,FPSR_EXCEPT(%a6) |test for unfl bit set
+	beqs	no_uacc1		|if clear, do not set aunfl
+	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_uacc1:
+	rts
+
+|
+|	DST_NAN
+|
+| Determine if the destination nan is signalling or non-signalling,
+| and set the FPSR bits accordingly.  See the MC68040 User's Manual
+| section 3.2.2.5 NOT-A-NUMBERS.
+|
+dst_nan:
+	btstb	#sign_bit,FPTEMP_EX(%a6) |test sign of nan
+	beqs	dst_pos			|if clr, it was positive
+	bsetb	#neg_bit,FPSR_CC(%a6)	|set N bit
+dst_pos:
+	btstb	#signan_bit,FPTEMP_HI(%a6) |check if signalling
+	beqs	dst_snan		|branch if signalling
+
+	fmovel	%d1,%fpcr			|restore user's rmode/prec
+	fmovex FPTEMP(%a6),%fp0		|return the non-signalling nan
+|
+| Check the source nan.  If it is signalling, snan will be reported.
+|
+	moveb	STAG(%a6),%d0
+	andib	#0xe0,%d0
+	cmpib	#0x60,%d0
+	bnes	no_snan
+	btstb	#signan_bit,ETEMP_HI(%a6) |check if signalling
+	bnes	no_snan
+	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
+no_snan:
+	rts
+
+dst_snan:
+	btstb	#snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
+	beqs	dst_dis			|branch if disabled
+
+	orb	#nan_tag,DTAG(%a6)	|set up dtag for nan
+	st	STORE_FLG(%a6)		|do not store a result
+	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
+	rts
+
+dst_dis:
+	bsetb	#signan_bit,FPTEMP_HI(%a6) |set SNAN bit in sop
+	fmovel	%d1,%fpcr			|restore user's rmode/prec
+	fmovex FPTEMP(%a6),%fp0		|load non-sign. nan
+	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
+	rts
+
+|
+|	SRC_NAN
+|
+| Determine if the source nan is signalling or non-signalling,
+| and set the FPSR bits accordingly.  See the MC68040 User's Manual
+| section 3.2.2.5 NOT-A-NUMBERS.
+|
+src_nan:
+	btstb	#sign_bit,ETEMP_EX(%a6) |test sign of nan
+	beqs	src_pos			|if clr, it was positive
+	bsetb	#neg_bit,FPSR_CC(%a6)	|set N bit
+src_pos:
+	btstb	#signan_bit,ETEMP_HI(%a6) |check if signalling
+	beqs	src_snan		|branch if signalling
+	fmovel	%d1,%fpcr			|restore user's rmode/prec
+	fmovex ETEMP(%a6),%fp0		|return the non-signalling nan
+	rts
+
+src_snan:
+	btstb	#snan_bit,FPCR_ENABLE(%a6) |check if trap enabled
+	beqs	src_dis			|branch if disabled
+	bsetb	#signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
+	orb	#norm_tag,DTAG(%a6)	|set up dtag for norm
+	orb	#nan_tag,STAG(%a6)	|set up stag for nan
+	st	STORE_FLG(%a6)		|do not store a result
+	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
+	rts
+
+src_dis:
+	bsetb	#signan_bit,ETEMP_HI(%a6) |set SNAN bit in sop
+	fmovel	%d1,%fpcr			|restore user's rmode/prec
+	fmovex ETEMP(%a6),%fp0		|load non-sign. nan
+	orl	#snaniop_mask,USER_FPSR(%a6) |set NAN, SNAN, AIOP
+	rts
+
+|
+| For all functions that have a denormalized input and that f(x)=x,
+| this is the entry point
+|
+t_extdnrm:
+	orl	#unfinx_mask,USER_FPSR(%a6)
+|					;set UNFL, INEX2, AUNFL, AINEX
+	bras	xdnrm_con
+|
+| Entry point for scale with extended denorm.  The function does
+| not set inex2, aunfl, or ainex.
+|
+t_resdnrm:
+	orl	#unfl_mask,USER_FPSR(%a6)
+
+xdnrm_con:
+	btstb	#unfl_bit,FPCR_ENABLE(%a6)
+	beqs	xdnrm_dis
+
+|
+| If exceptions are enabled, the additional task of setting up WBTEMP
+| is needed so that when the underflow exception handler is entered,
+| the user perceives no difference between what the 040 provides vs.
+| what the FPSP provides.
+|
+xdnrm_ena:
+	movel	%a0,-(%a7)
+
+	movel	LOCAL_EX(%a0),FP_SCR1(%a6)
+	movel	LOCAL_HI(%a0),FP_SCR1+4(%a6)
+	movel	LOCAL_LO(%a0),FP_SCR1+8(%a6)
+
+	lea	FP_SCR1(%a6),%a0
+
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		|convert to internal ext format
+	tstw	LOCAL_EX(%a0)		|check if input is denorm
+	beqs	xdnrm_dn		|if so, skip nrm_set
+	bsr	nrm_set			|normalize the result (exponent
+|					;will be negative
+xdnrm_dn:
+	bclrb	#sign_bit,LOCAL_EX(%a0)	|take off false sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
+	beqs	xdep
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+xdep:
+	bfclr	STAG(%a6){#5:#3}		|clear wbtm66,wbtm1,wbtm0
+	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
+	bclrb	#sticky_bit,STICKY(%a6)	|clear sticky bit
+	bclrb	#E1,E_BYTE(%a6)
+	movel	(%a7)+,%a0
+xdnrm_dis:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|get round precision
+	bnes	not_ext			|if not round extended, store
+|					;IEEE defaults
+is_ext:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beqs	xdnrm_store
+
+	bsetb	#neg_bit,FPSR_CC(%a6)	|set N bit in FPSR_CC
+
+	bras	xdnrm_store
+
+not_ext:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		|convert to internal ext format
+	bsr	unf_sub			|returns IEEE result pointed by
+|					;a0; sets FPSR_CC accordingly
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
+	beqs	xdnrm_store
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+xdnrm_store:
+	fmovemx (%a0),%fp0-%fp0		|store result in fp0
+	rts
+
+|
+| This subroutine is used for dyadic operations that use an extended
+| denorm within the kernel. The approach used is to capture the frame,
+| fix/restore.
+|
+	.global	t_avoid_unsupp
+t_avoid_unsupp:
+	link	%a2,#-LOCAL_SIZE		|so that a2 fpsp.h negative
+|					;offsets may be used
+	fsave	-(%a7)
+	tstb	1(%a7)			|check if idle, exit if so
+	beq	idle_end
+	btstb	#E1,E_BYTE(%a2)		|check for an E1 exception if
+|					;enabled, there is an unsupp
+	beq	end_avun		|else, exit
+	btstb	#7,DTAG(%a2)		|check for denorm destination
+	beqs	src_den			|else, must be a source denorm
+|
+| handle destination denorm
+|
+	lea	FPTEMP(%a2),%a0
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		|convert to internal ext format
+	bclrb	#7,DTAG(%a2)		|set DTAG to norm
+	bsr	nrm_set			|normalize result, exponent
+|					;will become negative
+	bclrb	#sign_bit,LOCAL_EX(%a0)	|get rid of fake sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
+	beqs	ck_src_den		|check if source is also denorm
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+ck_src_den:
+	btstb	#7,STAG(%a2)
+	beqs	end_avun
+src_den:
+	lea	ETEMP(%a2),%a0
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		|convert to internal ext format
+	bclrb	#7,STAG(%a2)		|set STAG to norm
+	bsr	nrm_set			|normalize result, exponent
+|					;will become negative
+	bclrb	#sign_bit,LOCAL_EX(%a0)	|get rid of fake sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
+	beqs	den_com
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+den_com:
+	moveb	#0xfe,CU_SAVEPC(%a2)	|set continue frame
+	clrw	NMNEXC(%a2)		|clear NMNEXC
+	bclrb	#E1,E_BYTE(%a2)
+|	fmove.l	%FPSR,FPSR_SHADOW(%a2)
+|	bset.b	#SFLAG,E_BYTE(%a2)
+|	bset.b	#XFLAG,T_BYTE(%a2)
+end_avun:
+	frestore (%a7)+
+	unlk	%a2
+	rts
+idle_end:
+	addl	#4,%a7
+	unlk	%a2
+	rts
+	|end
diff --git a/arch/m68k/fpsp040/res_func.S b/arch/m68k/fpsp040/res_func.S
new file mode 100644
index 0000000..8f6b952
--- /dev/null
+++ b/arch/m68k/fpsp040/res_func.S
@@ -0,0 +1,2040 @@
+|
+|	res_func.sa 3.9 7/29/91
+|
+| Normalizes denormalized numbers if necessary and updates the
+| stack frame.  The function is then restored back into the
+| machine and the 040 completes the operation.  This routine
+| is only used by the unsupported data type/format handler.
+| (Exception vector 55).
+|
+| For packed move out (fmove.p fpm,<ea>) the operation is
+| completed here; data is packed and moved to user memory.
+| The stack is restored to the 040 only in the case of a
+| reportable exception in the conversion.
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+RES_FUNC:    |idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+sp_bnds:	.short	0x3f81,0x407e
+		.short	0x3f6a,0x0000
+dp_bnds:	.short	0x3c01,0x43fe
+		.short	0x3bcd,0x0000
+
+	|xref	mem_write
+	|xref	bindec
+	|xref	get_fline
+	|xref	round
+	|xref	denorm
+	|xref	dest_ext
+	|xref	dest_dbl
+	|xref	dest_sgl
+	|xref	unf_sub
+	|xref	nrm_set
+	|xref	dnrm_lp
+	|xref	ovf_res
+	|xref	reg_dest
+	|xref	t_ovfl
+	|xref	t_unfl
+
+	.global	res_func
+	.global	p_move
+
+res_func:
+	clrb	DNRM_FLG(%a6)
+	clrb	RES_FLG(%a6)
+	clrb	CU_ONLY(%a6)
+	tstb	DY_MO_FLG(%a6)
+	beqs	monadic
+dyadic:
+	btstb	#7,DTAG(%a6)	|if dop = norm=000, zero=001,
+|				;inf=010 or nan=011
+	beqs	monadic		|then branch
+|				;else denorm
+| HANDLE DESTINATION DENORM HERE
+|				;set dtag to norm
+|				;write the tag & fpte15 to the fstack
+	leal	FPTEMP(%a6),%a0
+
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+
+	bsr	nrm_set		|normalize number (exp will go negative)
+	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
+	beqs	dpos
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+dpos:
+	bfclr	DTAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
+	bsetb	#4,DTAG(%a6)	|set FPTE15
+	orb	#0x0f,DNRM_FLG(%a6)
+monadic:
+	leal	ETEMP(%a6),%a0
+	btstb	#direction_bit,CMDREG1B(%a6)	|check direction
+	bne	opclass3			|it is a mv out
+|
+| At this point, only opclass 0 and 2 possible
+|
+	btstb	#7,STAG(%a6)	|if sop = norm=000, zero=001,
+|				;inf=010 or nan=011
+	bne	mon_dnrm	|else denorm
+	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
+	bne	normal		|require normalization of denorm
+
+| At this point:
+|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
+|				fmove = $00  fsmove = $40  fdmove = $44
+|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
+|				(*fsqrt reencoded to $05)
+|
+	movew	CMDREG1B(%a6),%d0	|get command register
+	andil	#0x7f,%d0			|strip to only command word
+|
+| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
+| fdsqrt are possible.
+| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+|
+	btstl	#0,%d0
+	bne	normal			|weed out fsqrt instructions
+|
+| cu_norm handles fmove in instructions with normalized inputs.
+| The routine round is used to correctly round the input for the
+| destination precision and mode.
+|
+cu_norm:
+	st	CU_ONLY(%a6)		|set cu-only inst flag
+	movew	CMDREG1B(%a6),%d0
+	andib	#0x3b,%d0		|isolate bits to select inst
+	tstb	%d0
+	beql	cu_nmove	|if zero, it is an fmove
+	cmpib	#0x18,%d0
+	beql	cu_nabs		|if $18, it is fabs
+	cmpib	#0x1a,%d0
+	beql	cu_nneg		|if $1a, it is fneg
+|
+| Inst is ftst.  Check the source operand and set the cc's accordingly.
+| No write is done, so simply rts.
+|
+cu_ntst:
+	movew	LOCAL_EX(%a0),%d0
+	bclrl	#15,%d0
+	sne	LOCAL_SGN(%a0)
+	beqs	cu_ntpo
+	orl	#neg_mask,USER_FPSR(%a6) |set N
+cu_ntpo:
+	cmpiw	#0x7fff,%d0	|test for inf/nan
+	bnes	cu_ntcz
+	tstl	LOCAL_HI(%a0)
+	bnes	cu_ntn
+	tstl	LOCAL_LO(%a0)
+	bnes	cu_ntn
+	orl	#inf_mask,USER_FPSR(%a6)
+	rts
+cu_ntn:
+	orl	#nan_mask,USER_FPSR(%a6)
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
+|						;snan handler
+
+	rts
+cu_ntcz:
+	tstl	LOCAL_HI(%a0)
+	bnel	cu_ntsx
+	tstl	LOCAL_LO(%a0)
+	bnel	cu_ntsx
+	orl	#z_mask,USER_FPSR(%a6)
+cu_ntsx:
+	rts
+|
+| Inst is fabs.  Execute the absolute value function on the input.
+| Branch to the fmove code.  If the operand is NaN, do nothing.
+|
+cu_nabs:
+	moveb	STAG(%a6),%d0
+	btstl	#5,%d0			|test for NaN or zero
+	bne	wr_etemp		|if either, simply write it
+	bclrb	#7,LOCAL_EX(%a0)		|do abs
+	bras	cu_nmove		|fmove code will finish
+|
+| Inst is fneg.  Execute the negate value function on the input.
+| Fall though to the fmove code.  If the operand is NaN, do nothing.
+|
+cu_nneg:
+	moveb	STAG(%a6),%d0
+	btstl	#5,%d0			|test for NaN or zero
+	bne	wr_etemp		|if either, simply write it
+	bchgb	#7,LOCAL_EX(%a0)		|do neg
+|
+| Inst is fmove.  This code also handles all result writes.
+| If bit 2 is set, round is forced to double.  If it is clear,
+| and bit 6 is set, round is forced to single.  If both are clear,
+| the round precision is found in the fpcr.  If the rounding precision
+| is double or single, round the result before the write.
+|
+cu_nmove:
+	moveb	STAG(%a6),%d0
+	andib	#0xe0,%d0			|isolate stag bits
+	bne	wr_etemp		|if not norm, simply write it
+	btstb	#2,CMDREG1B+1(%a6)	|check for rd
+	bne	cu_nmrd
+	btstb	#6,CMDREG1B+1(%a6)	|check for rs
+	bne	cu_nmrs
+|
+| The move or operation is not with forced precision.  Test for
+| nan or inf as the input; if so, simply write it to FPn.  Use the
+| FPCR_MODE byte to get rounding on norms and zeros.
+|
+cu_nmnr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0
+	tstb	%d0			|check for extended
+	beq	cu_wrexn		|if so, just write result
+	cmpib	#1,%d0			|check for single
+	beq	cu_nmrs			|fall through to double
+|
+| The move is fdmove or round precision is double.
+|
+cu_nmrd:
+	movel	#2,%d0			|set up the size for denorm
+	movew	LOCAL_EX(%a0),%d1		|compare exponent to double threshold
+	andw	#0x7fff,%d1
+	cmpw	#0x3c01,%d1
+	bls	cu_nunfl
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
+	orl	#0x00020000,%d1		|or in rprec (double)
+	clrl	%d0			|clear g,r,s for round
+	bclrb	#sign_bit,LOCAL_EX(%a0)	|convert to internal format
+	sne	LOCAL_SGN(%a0)
+	bsrl	round
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	cu_nmrdc
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nmrdc:
+	movew	LOCAL_EX(%a0),%d1		|check for overflow
+	andw	#0x7fff,%d1
+	cmpw	#0x43ff,%d1
+	bge	cu_novfl		|take care of overflow case
+	bra	cu_wrexn
+|
+| The move is fsmove or round precision is single.
+|
+cu_nmrs:
+	movel	#1,%d0
+	movew	LOCAL_EX(%a0),%d1
+	andw	#0x7fff,%d1
+	cmpw	#0x3f81,%d1
+	bls	cu_nunfl
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1
+	orl	#0x00010000,%d1
+	clrl	%d0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsrl	round
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	cu_nmrsc
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nmrsc:
+	movew	LOCAL_EX(%a0),%d1
+	andw	#0x7FFF,%d1
+	cmpw	#0x407f,%d1
+	blt	cu_wrexn
+|
+| The operand is above precision boundaries.  Use t_ovfl to
+| generate the correct value.
+|
+cu_novfl:
+	bsr	t_ovfl
+	bra	cu_wrexn
+|
+| The operand is below precision boundaries.  Use denorm to
+| generate the correct value.
+|
+cu_nunfl:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsr	denorm
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
+	beqs	cu_nucont
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nucont:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1
+	btstb	#2,CMDREG1B+1(%a6)	|check for rd
+	bne	inst_d
+	btstb	#6,CMDREG1B+1(%a6)	|check for rs
+	bne	inst_s
+	swap	%d1
+	moveb	FPCR_MODE(%a6),%d1
+	lsrb	#6,%d1
+	swap	%d1
+	bra	inst_sd
+inst_d:
+	orl	#0x00020000,%d1
+	bra	inst_sd
+inst_s:
+	orl	#0x00010000,%d1
+inst_sd:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsrl	round
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	cu_nuflp
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+cu_nuflp:
+	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs	cu_nuninx
+	orl	#aunfl_mask,USER_FPSR(%a6) |if the round was inex, set AUNFL
+cu_nuninx:
+	tstl	LOCAL_HI(%a0)		|test for zero
+	bnes	cu_nunzro
+	tstl	LOCAL_LO(%a0)
+	bnes	cu_nunzro
+|
+| The mantissa is zero from the denorm loop.  Check sign and rmode
+| to see if rounding should have occurred which would leave the lsb.
+|
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0		|isolate rmode
+	cmpil	#0x20,%d0
+	blts	cu_nzro
+	bnes	cu_nrp
+cu_nrm:
+	tstw	LOCAL_EX(%a0)	|if positive, set lsb
+	bges	cu_nzro
+	btstb	#7,FPCR_MODE(%a6) |check for double
+	beqs	cu_nincs
+	bras	cu_nincd
+cu_nrp:
+	tstw	LOCAL_EX(%a0)	|if positive, set lsb
+	blts	cu_nzro
+	btstb	#7,FPCR_MODE(%a6) |check for double
+	beqs	cu_nincs
+cu_nincd:
+	orl	#0x800,LOCAL_LO(%a0) |inc for double
+	bra	cu_nunzro
+cu_nincs:
+	orl	#0x100,LOCAL_HI(%a0) |inc for single
+	bra	cu_nunzro
+cu_nzro:
+	orl	#z_mask,USER_FPSR(%a6)
+	moveb	STAG(%a6),%d0
+	andib	#0xe0,%d0
+	cmpib	#0x40,%d0		|check if input was tagged zero
+	beqs	cu_numv
+cu_nunzro:
+	orl	#unfl_mask,USER_FPSR(%a6) |set unfl
+cu_numv:
+	movel	(%a0),ETEMP(%a6)
+	movel	4(%a0),ETEMP_HI(%a6)
+	movel	8(%a0),ETEMP_LO(%a6)
+|
+| Write the result to memory, setting the fpsr cc bits.  NaN and Inf
+| bypass cu_wrexn.
+|
+cu_wrexn:
+	tstw	LOCAL_EX(%a0)		|test for zero
+	beqs	cu_wrzero
+	cmpw	#0x8000,LOCAL_EX(%a0)	|test for zero
+	bnes	cu_wreon
+cu_wrzero:
+	orl	#z_mask,USER_FPSR(%a6)	|set Z bit
+cu_wreon:
+	tstw	LOCAL_EX(%a0)
+	bpl	wr_etemp
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+
+|
+| HANDLE SOURCE DENORM HERE
+|
+|				;clear denorm stag to norm
+|				;write the new tag & ete15 to the fstack
+mon_dnrm:
+|
+| At this point, check for the cases in which normalizing the
+| denorm produces incorrect results.
+|
+	tstb	DY_MO_FLG(%a6)	|all cases of dyadic instructions would
+	bnes	nrm_src		|require normalization of denorm
+
+| At this point:
+|	monadic instructions:	fabs  = $18  fneg   = $1a  ftst   = $3a
+|				fmove = $00  fsmove = $40  fdmove = $44
+|				fsqrt = $05* fssqrt = $41  fdsqrt = $45
+|				(*fsqrt reencoded to $05)
+|
+	movew	CMDREG1B(%a6),%d0	|get command register
+	andil	#0x7f,%d0			|strip to only command word
+|
+| At this point, fabs, fneg, fsmove, fdmove, ftst, fsqrt, fssqrt, and
+| fdsqrt are possible.
+| For cases fabs, fneg, fsmove, and fdmove goto spos (do not normalize)
+| For cases fsqrt, fssqrt, and fdsqrt goto nrm_src (do normalize)
+|
+	btstl	#0,%d0
+	bnes	nrm_src		|weed out fsqrt instructions
+	st	CU_ONLY(%a6)	|set cu-only inst flag
+	bra	cu_dnrm		|fmove, fabs, fneg, ftst
+|				;cases go to cu_dnrm
+nrm_src:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	bsr	nrm_set		|normalize number (exponent will go
+|				; negative)
+	bclrb	#sign_bit,LOCAL_EX(%a0) |get rid of false sign
+
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
+	beqs	spos
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+spos:
+	bfclr	STAG(%a6){#0:#4}	|set tag to normalized, FPTE15 = 0
+	bsetb	#4,STAG(%a6)	|set ETE15
+	orb	#0xf0,DNRM_FLG(%a6)
+normal:
+	tstb	DNRM_FLG(%a6)	|check if any of the ops were denorms
+	bne	ck_wrap		|if so, check if it is a potential
+|				;wrap-around case
+fix_stk:
+	moveb	#0xfe,CU_SAVEPC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+
+	clrw	NMNEXC(%a6)
+
+	st	RES_FLG(%a6)	|indicate that a restore is needed
+	rts
+
+|
+| cu_dnrm handles all cu-only instructions (fmove, fabs, fneg, and
+| ftst) completely in software without an frestore to the 040.
+|
+cu_dnrm:
+	st	CU_ONLY(%a6)
+	movew	CMDREG1B(%a6),%d0
+	andib	#0x3b,%d0		|isolate bits to select inst
+	tstb	%d0
+	beql	cu_dmove	|if zero, it is an fmove
+	cmpib	#0x18,%d0
+	beql	cu_dabs		|if $18, it is fabs
+	cmpib	#0x1a,%d0
+	beql	cu_dneg		|if $1a, it is fneg
+|
+| Inst is ftst.  Check the source operand and set the cc's accordingly.
+| No write is done, so simply rts.
+|
+cu_dtst:
+	movew	LOCAL_EX(%a0),%d0
+	bclrl	#15,%d0
+	sne	LOCAL_SGN(%a0)
+	beqs	cu_dtpo
+	orl	#neg_mask,USER_FPSR(%a6) |set N
+cu_dtpo:
+	cmpiw	#0x7fff,%d0	|test for inf/nan
+	bnes	cu_dtcz
+	tstl	LOCAL_HI(%a0)
+	bnes	cu_dtn
+	tstl	LOCAL_LO(%a0)
+	bnes	cu_dtn
+	orl	#inf_mask,USER_FPSR(%a6)
+	rts
+cu_dtn:
+	orl	#nan_mask,USER_FPSR(%a6)
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
+|						;snan handler
+	rts
+cu_dtcz:
+	tstl	LOCAL_HI(%a0)
+	bnel	cu_dtsx
+	tstl	LOCAL_LO(%a0)
+	bnel	cu_dtsx
+	orl	#z_mask,USER_FPSR(%a6)
+cu_dtsx:
+	rts
+|
+| Inst is fabs.  Execute the absolute value function on the input.
+| Branch to the fmove code.
+|
+cu_dabs:
+	bclrb	#7,LOCAL_EX(%a0)		|do abs
+	bras	cu_dmove		|fmove code will finish
+|
+| Inst is fneg.  Execute the negate value function on the input.
+| Fall though to the fmove code.
+|
+cu_dneg:
+	bchgb	#7,LOCAL_EX(%a0)		|do neg
+|
+| Inst is fmove.  This code also handles all result writes.
+| If bit 2 is set, round is forced to double.  If it is clear,
+| and bit 6 is set, round is forced to single.  If both are clear,
+| the round precision is found in the fpcr.  If the rounding precision
+| is double or single, the result is zero, and the mode is checked
+| to determine if the lsb of the result should be set.
+|
+cu_dmove:
+	btstb	#2,CMDREG1B+1(%a6)	|check for rd
+	bne	cu_dmrd
+	btstb	#6,CMDREG1B+1(%a6)	|check for rs
+	bne	cu_dmrs
+|
+| The move or operation is not with forced precision.  Use the
+| FPCR_MODE byte to get rounding.
+|
+cu_dmnr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0
+	tstb	%d0			|check for extended
+	beq	cu_wrexd		|if so, just write result
+	cmpib	#1,%d0			|check for single
+	beq	cu_dmrs			|fall through to double
+|
+| The move is fdmove or round precision is double.  Result is zero.
+| Check rmode for rp or rm and set lsb accordingly.
+|
+cu_dmrd:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
+	tstw	LOCAL_EX(%a0)		|check sign
+	blts	cu_dmdn
+	cmpib	#3,%d1			|check for rp
+	bne	cu_dpd			|load double pos zero
+	bra	cu_dpdr			|load double pos zero w/lsb
+cu_dmdn:
+	cmpib	#2,%d1			|check for rm
+	bne	cu_dnd			|load double neg zero
+	bra	cu_dndr			|load double neg zero w/lsb
+|
+| The move is fsmove or round precision is single.  Result is zero.
+| Check for rp or rm and set lsb accordingly.
+|
+cu_dmrs:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|get rmode
+	tstw	LOCAL_EX(%a0)		|check sign
+	blts	cu_dmsn
+	cmpib	#3,%d1			|check for rp
+	bne	cu_spd			|load single pos zero
+	bra	cu_spdr			|load single pos zero w/lsb
+cu_dmsn:
+	cmpib	#2,%d1			|check for rm
+	bne	cu_snd			|load single neg zero
+	bra	cu_sndr			|load single neg zero w/lsb
+|
+| The precision is extended, so the result in etemp is correct.
+| Simply set unfl (not inex2 or aunfl) and write the result to
+| the correct fp register.
+cu_wrexd:
+	orl	#unfl_mask,USER_FPSR(%a6)
+	tstw	LOCAL_EX(%a0)
+	beq	wr_etemp
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+|
+| These routines write +/- zero in double format.  The routines
+| cu_dpdr and cu_dndr set the double lsb.
+|
+cu_dpd:
+	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_dpdr:
+	movel	#0x3c010000,LOCAL_EX(%a0)	|force pos double zero
+	clrl	LOCAL_HI(%a0)
+	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_dnd:
+	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_dndr:
+	movel	#0xbc010000,LOCAL_EX(%a0)	|force pos double zero
+	clrl	LOCAL_HI(%a0)
+	movel	#0x800,LOCAL_LO(%a0)	|with lsb set
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+|
+| These routines write +/- zero in single format.  The routines
+| cu_dpdr and cu_dndr set the single lsb.
+|
+cu_spd:
+	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_spdr:
+	movel	#0x3f810000,LOCAL_EX(%a0)	|force pos single zero
+	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
+	clrl	LOCAL_LO(%a0)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_snd:
+	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	orl	#z_mask,USER_FPSR(%a6)
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+cu_sndr:
+	movel	#0xbf810000,LOCAL_EX(%a0)	|force pos single zero
+	movel	#0x100,LOCAL_HI(%a0)	|with lsb set
+	clrl	LOCAL_LO(%a0)
+	orl	#neg_mask,USER_FPSR(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	bra	wr_etemp
+
+|
+| This code checks for 16-bit overflow conditions on dyadic
+| operations which are not restorable into the floating-point
+| unit and must be completed in software.  Basically, this
+| condition exists with a very large norm and a denorm.  One
+| of the operands must be denormalized to enter this code.
+|
+| Flags used:
+|	DY_MO_FLG contains 0 for monadic op, $ff for dyadic
+|	DNRM_FLG contains $00 for neither op denormalized
+|	                  $0f for the destination op denormalized
+|	                  $f0 for the source op denormalized
+|	                  $ff for both ops denormalized
+|
+| The wrap-around condition occurs for add, sub, div, and cmp
+| when
+|
+|	abs(dest_exp - src_exp) >= $8000
+|
+| and for mul when
+|
+|	(dest_exp + src_exp) < $0
+|
+| we must process the operation here if this case is true.
+|
+| The rts following the frcfpn routine is the exit from res_func
+| for this condition.  The restore flag (RES_FLG) is left clear.
+| No frestore is done unless an exception is to be reported.
+|
+| For fadd:
+|	if(sign_of(dest) != sign_of(src))
+|		replace exponent of src with $3fff (keep sign)
+|		use fpu to perform dest+new_src (user's rmode and X)
+|		clr sticky
+|	else
+|		set sticky
+|	call round with user's precision and mode
+|	move result to fpn and wbtemp
+|
+| For fsub:
+|	if(sign_of(dest) == sign_of(src))
+|		replace exponent of src with $3fff (keep sign)
+|		use fpu to perform dest+new_src (user's rmode and X)
+|		clr sticky
+|	else
+|		set sticky
+|	call round with user's precision and mode
+|	move result to fpn and wbtemp
+|
+| For fdiv/fsgldiv:
+|	if(both operands are denorm)
+|		restore_to_fpu;
+|	if(dest is norm)
+|		force_ovf;
+|	else(dest is denorm)
+|		force_unf:
+|
+| For fcmp:
+|	if(dest is norm)
+|		N = sign_of(dest);
+|	else(dest is denorm)
+|		N = sign_of(src);
+|
+| For fmul:
+|	if(both operands are denorm)
+|		force_unf;
+|	if((dest_exp + src_exp) < 0)
+|		force_unf:
+|	else
+|		restore_to_fpu;
+|
+| local equates:
+	.set	addcode,0x22
+	.set	subcode,0x28
+	.set	mulcode,0x23
+	.set	divcode,0x20
+	.set	cmpcode,0x38
+ck_wrap:
+	| tstb	DY_MO_FLG(%a6)	;check for fsqrt
+	beq	fix_stk		|if zero, it is fsqrt
+	movew	CMDREG1B(%a6),%d0
+	andiw	#0x3b,%d0		|strip to command bits
+	cmpiw	#addcode,%d0
+	beq	wrap_add
+	cmpiw	#subcode,%d0
+	beq	wrap_sub
+	cmpiw	#mulcode,%d0
+	beq	wrap_mul
+	cmpiw	#cmpcode,%d0
+	beq	wrap_cmp
+|
+| Inst is fdiv.
+|
+wrap_div:
+	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
+	beq	fix_stk		 |restore to fpu
+|
+| One of the ops is denormalized.  Test for wrap condition
+| and force the result.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
+	bnes	div_srcd
+div_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
+	subl	%d1,%d0			|subtract dest from src
+	cmpl	#0x7fff,%d0
+	blt	fix_stk			|if less, not wrap case
+	clrb	WBTEMP_SGN(%a6)
+	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beq	force_unf
+	st	WBTEMP_SGN(%a6)
+	bra	force_unf
+
+ckinf_ns:
+	moveb	STAG(%a6),%d0		|check source tag for inf or nan
+	bra	ck_in_com
+ckinf_nd:
+	moveb	DTAG(%a6),%d0		|check destination tag for inf or nan
+ck_in_com:
+	andib	#0x60,%d0			|isolate tag bits
+	cmpb	#0x40,%d0			|is it inf?
+	beq	nan_or_inf		|not wrap case
+	cmpb	#0x60,%d0			|is it nan?
+	beq	nan_or_inf		|yes, not wrap case?
+	cmpb	#0x20,%d0			|is it a zero?
+	beq	nan_or_inf		|yes
+	clrl	%d0
+	rts				|then ; it is either a zero of norm,
+|					;check wrap case
+nan_or_inf:
+	moveql	#-1,%d0
+	rts
+
+
+
+div_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
+	subl	%d1,%d0			|subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+	clrb	WBTEMP_SGN(%a6)
+	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beqs	force_ovf
+	st	WBTEMP_SGN(%a6)
+|
+| This code handles the case of the instruction resulting in
+| an overflow condition.
+|
+force_ovf:
+	bclrb	#E1,E_BYTE(%a6)
+	orl	#ovfl_inx_mask,USER_FPSR(%a6)
+	clrw	NMNEXC(%a6)
+	leal	WBTEMP(%a6),%a0		|point a0 to memory location
+	movew	CMDREG1B(%a6),%d0
+	btstl	#6,%d0			|test for forced precision
+	beqs	frcovf_fpcr
+	btstl	#2,%d0			|check for double
+	bnes	frcovf_dbl
+	movel	#0x1,%d0			|inst is forced single
+	bras	frcovf_rnd
+frcovf_dbl:
+	movel	#0x2,%d0			|inst is forced double
+	bras	frcovf_rnd
+frcovf_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
+frcovf_rnd:
+
+| The 881/882 does not set inex2 for the following case, so the
+| line is commented out to be compatible with 881/882
+|	tst.b	%d0
+|	beq.b	frcovf_x
+|	or.l	#inex2_mask,USER_FPSR(%a6) ;if prec is s or d, set inex2
+
+|frcovf_x:
+	bsrl	ovf_res			|get correct result based on
+|					;round precision/mode.  This
+|					;sets FPSR_CC correctly
+|					;returns in external format
+	bfclr	WBTEMP_SGN(%a6){#0:#8}
+	beq	frcfpn
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpn
+|
+| Inst is fadd.
+|
+wrap_add:
+	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
+	beq	fix_stk		 |restore to fpu
+|
+| One of the ops is denormalized.  Test for wrap condition
+| and complete the instruction.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
+	bnes	add_srcd
+add_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
+	subl	%d1,%d0			|subtract dest from src
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+	bra	add_wrap
+add_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
+	subl	%d1,%d0			|subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+|
+| Check the signs of the operands.  If they are unlike, the fpu
+| can be used to add the norm and 1.0 with the sign of the
+| denorm and it will correctly generate the result in extended
+| precision.  We can then call round with no sticky and the result
+| will be correct for the user's rounding mode and precision.  If
+| the signs are the same, we call round with the sticky bit set
+| and the result will be correct for the user's rounding mode and
+| precision.
+|
+add_wrap:
+	movew	ETEMP_EX(%a6),%d0
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beq	add_same
+|
+| The signs are unlike.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
+	bnes	add_u_srcd
+	movew	FPTEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	|force the exponent to +/- 1
+	movew	%d0,FPTEMP_EX(%a6) |in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		|set up users rmode and X
+	fmovex	ETEMP(%a6),%fp0
+	faddx	FPTEMP(%a6),%fp0
+	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
+	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	clrl	%d0		|force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+add_u_srcd:
+	movew	ETEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	|force the exponent to +/- 1
+	movew	%d0,ETEMP_EX(%a6) |in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		|set up users rmode and X
+	fmovex	ETEMP(%a6),%fp0
+	faddx	FPTEMP(%a6),%fp0
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
+	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
+	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	clrl	%d0		|force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)	|use internal format for round
+	bsrl	round		|round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+|
+| Signs are alike:
+|
+add_same:
+	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
+	bnes	add_s_srcd
+add_s_destd:
+	leal	ETEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	movel	#0x20000000,%d0	|set sticky for round
+	bclrb	#sign_bit,ETEMP_EX(%a6)
+	sne	ETEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beqs	add_s_dclr
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+add_s_dclr:
+	leal	WBTEMP(%a6),%a0
+	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	tstw	ETEMP_EX(%a6)
+	bgt	add_ckovf
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	add_ckovf
+add_s_srcd:
+	leal	FPTEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	movel	#0x20000000,%d0	|set sticky for round
+	bclrb	#sign_bit,FPTEMP_EX(%a6)
+	sne	FPTEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beqs	add_s_sclr
+	bsetb	#sign_bit,FPTEMP_EX(%a6)
+add_s_sclr:
+	leal	WBTEMP(%a6),%a0
+	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
+	movel	FPTEMP_HI(%a6),4(%a0)
+	movel	FPTEMP_LO(%a6),8(%a0)
+	tstw	FPTEMP_EX(%a6)
+	bgt	add_ckovf
+	orl	#neg_mask,USER_FPSR(%a6)
+add_ckovf:
+	movew	WBTEMP_EX(%a6),%d0
+	andiw	#0x7fff,%d0
+	cmpiw	#0x7fff,%d0
+	bne	frcfpnr
+|
+| The result has overflowed to $7fff exponent.  Set I, ovfl,
+| and aovfl, and clr the mantissa (incorrectly set by the
+| round routine.)
+|
+	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
+	clrl	4(%a0)
+	bra	frcfpnr
+|
+| Inst is fsub.
+|
+wrap_sub:
+	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
+	beq	fix_stk		 |restore to fpu
+|
+| One of the ops is denormalized.  Test for wrap condition
+| and complete the instruction.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
+	bnes	sub_srcd
+sub_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
+	subl	%d1,%d0			|subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+	bra	sub_wrap
+sub_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
+	subl	%d1,%d0			|subtract dest from src
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+|
+| Check the signs of the operands.  If they are alike, the fpu
+| can be used to subtract from the norm 1.0 with the sign of the
+| denorm and it will correctly generate the result in extended
+| precision.  We can then call round with no sticky and the result
+| will be correct for the user's rounding mode and precision.  If
+| the signs are unlike, we call round with the sticky bit set
+| and the result will be correct for the user's rounding mode and
+| precision.
+|
+sub_wrap:
+	movew	ETEMP_EX(%a6),%d0
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	bne	sub_diff
+|
+| The signs are alike.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
+	bnes	sub_u_srcd
+	movew	FPTEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	|force the exponent to +/- 1
+	movew	%d0,FPTEMP_EX(%a6) |in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		|set up users rmode and X
+	fmovex	FPTEMP(%a6),%fp0
+	fsubx	ETEMP(%a6),%fp0
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
+	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
+	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	clrl	%d0		|force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+sub_u_srcd:
+	movew	ETEMP_EX(%a6),%d0
+	andiw	#0x8000,%d0
+	orw	#0x3fff,%d0	|force the exponent to +/- 1
+	movew	%d0,ETEMP_EX(%a6) |in the denorm
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	fmovel	%d0,%fpcr		|set up users rmode and X
+	fmovex	FPTEMP(%a6),%fp0
+	fsubx	ETEMP(%a6),%fp0
+	fmovel	%fpsr,%d1
+	orl	%d1,USER_FPSR(%a6) |capture cc's and inex from fadd
+	leal	WBTEMP(%a6),%a0	|point a0 to wbtemp in frame
+	fmovex	%fp0,WBTEMP(%a6)	|write result to memory
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	clrl	%d0		|force sticky to zero
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beq	frcfpnr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpnr
+|
+| Signs are unlike:
+|
+sub_diff:
+	cmpb	#0x0f,DNRM_FLG(%a6) |is dest the denorm?
+	bnes	sub_s_srcd
+sub_s_destd:
+	leal	ETEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	movel	#0x20000000,%d0	|set sticky for round
+|
+| Since the dest is the denorm, the sign is the opposite of the
+| norm sign.
+|
+	eoriw	#0x8000,ETEMP_EX(%a6)	|flip sign on result
+	tstw	ETEMP_EX(%a6)
+	bgts	sub_s_dwr
+	orl	#neg_mask,USER_FPSR(%a6)
+sub_s_dwr:
+	bclrb	#sign_bit,ETEMP_EX(%a6)
+	sne	ETEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	ETEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beqs	sub_s_dclr
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+sub_s_dclr:
+	leal	WBTEMP(%a6),%a0
+	movel	ETEMP(%a6),(%a0)	|write result to wbtemp
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	bra	sub_ckovf
+sub_s_srcd:
+	leal	FPTEMP(%a6),%a0
+	movel	USER_FPCR(%a6),%d0
+	andil	#0x30,%d0
+	lsrl	#4,%d0		|put rmode in lower 2 bits
+	movel	USER_FPCR(%a6),%d1
+	andil	#0xc0,%d1
+	lsrl	#6,%d1		|put precision in upper word
+	swap	%d1
+	orl	%d0,%d1		|set up for round call
+	movel	#0x20000000,%d0	|set sticky for round
+	bclrb	#sign_bit,FPTEMP_EX(%a6)
+	sne	FPTEMP_SGN(%a6)
+	bsrl	round		|round result to users rmode & prec
+	bfclr	FPTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beqs	sub_s_sclr
+	bsetb	#sign_bit,FPTEMP_EX(%a6)
+sub_s_sclr:
+	leal	WBTEMP(%a6),%a0
+	movel	FPTEMP(%a6),(%a0)	|write result to wbtemp
+	movel	FPTEMP_HI(%a6),4(%a0)
+	movel	FPTEMP_LO(%a6),8(%a0)
+	tstw	FPTEMP_EX(%a6)
+	bgt	sub_ckovf
+	orl	#neg_mask,USER_FPSR(%a6)
+sub_ckovf:
+	movew	WBTEMP_EX(%a6),%d0
+	andiw	#0x7fff,%d0
+	cmpiw	#0x7fff,%d0
+	bne	frcfpnr
+|
+| The result has overflowed to $7fff exponent.  Set I, ovfl,
+| and aovfl, and clr the mantissa (incorrectly set by the
+| round routine.)
+|
+	orl	#inf_mask+ovfl_inx_mask,USER_FPSR(%a6)
+	clrl	4(%a0)
+	bra	frcfpnr
+|
+| Inst is fcmp.
+|
+wrap_cmp:
+	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
+	beq	fix_stk		 |restore to fpu
+|
+| One of the ops is denormalized.  Test for wrap condition
+| and complete the instruction.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
+	bnes	cmp_srcd
+cmp_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
+	subl	%d1,%d0			|subtract dest from src
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+	tstw	ETEMP_EX(%a6)		|set N to ~sign_of(src)
+	bge	cmp_setn
+	rts
+cmp_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
+	subl	%d1,%d0			|subtract src from dest
+	cmpl	#0x8000,%d0
+	blt	fix_stk			|if less, not wrap case
+	tstw	FPTEMP_EX(%a6)		|set N to sign_of(dest)
+	blt	cmp_setn
+	rts
+cmp_setn:
+	orl	#neg_mask,USER_FPSR(%a6)
+	rts
+
+|
+| Inst is fmul.
+|
+wrap_mul:
+	cmpb	#0xff,DNRM_FLG(%a6) |if both ops denorm,
+	beq	force_unf	|force an underflow (really!)
+|
+| One of the ops is denormalized.  Test for wrap condition
+| and complete the instruction.
+|
+	cmpb	#0x0f,DNRM_FLG(%a6) |check for dest denorm
+	bnes	mul_srcd
+mul_destd:
+	bsrl	ckinf_ns
+	bne	fix_stk
+	bfextu	ETEMP_EX(%a6){#1:#15},%d0	|get src exp (always pos)
+	bfexts	FPTEMP_EX(%a6){#1:#15},%d1	|get dest exp (always neg)
+	addl	%d1,%d0			|subtract dest from src
+	bgt	fix_stk
+	bra	force_unf
+mul_srcd:
+	bsrl	ckinf_nd
+	bne	fix_stk
+	bfextu	FPTEMP_EX(%a6){#1:#15},%d0	|get dest exp (always pos)
+	bfexts	ETEMP_EX(%a6){#1:#15},%d1	|get src exp (always neg)
+	addl	%d1,%d0			|subtract src from dest
+	bgt	fix_stk
+
+|
+| This code handles the case of the instruction resulting in
+| an underflow condition.
+|
+force_unf:
+	bclrb	#E1,E_BYTE(%a6)
+	orl	#unfinx_mask,USER_FPSR(%a6)
+	clrw	NMNEXC(%a6)
+	clrb	WBTEMP_SGN(%a6)
+	movew	ETEMP_EX(%a6),%d0		|find the sign of the result
+	movew	FPTEMP_EX(%a6),%d1
+	eorw	%d1,%d0
+	andiw	#0x8000,%d0
+	beqs	frcunfcont
+	st	WBTEMP_SGN(%a6)
+frcunfcont:
+	lea	WBTEMP(%a6),%a0		|point a0 to memory location
+	movew	CMDREG1B(%a6),%d0
+	btstl	#6,%d0			|test for forced precision
+	beqs	frcunf_fpcr
+	btstl	#2,%d0			|check for double
+	bnes	frcunf_dbl
+	movel	#0x1,%d0			|inst is forced single
+	bras	frcunf_rnd
+frcunf_dbl:
+	movel	#0x2,%d0			|inst is forced double
+	bras	frcunf_rnd
+frcunf_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
+frcunf_rnd:
+	bsrl	unf_sub			|get correct result based on
+|					;round precision/mode.  This
+|					;sets FPSR_CC correctly
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beqs	frcfpn
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+	bra	frcfpn
+
+|
+| Write the result to the user's fpn.  All results must be HUGE to be
+| written; otherwise the results would have overflowed or underflowed.
+| If the rounding precision is single or double, the ovf_res routine
+| is needed to correctly supply the max value.
+|
+frcfpnr:
+	movew	CMDREG1B(%a6),%d0
+	btstl	#6,%d0			|test for forced precision
+	beqs	frcfpn_fpcr
+	btstl	#2,%d0			|check for double
+	bnes	frcfpn_dbl
+	movel	#0x1,%d0			|inst is forced single
+	bras	frcfpn_rnd
+frcfpn_dbl:
+	movel	#0x2,%d0			|inst is forced double
+	bras	frcfpn_rnd
+frcfpn_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0	|inst not forced - use fpcr prec
+	tstb	%d0
+	beqs	frcfpn			|if extended, write what you got
+frcfpn_rnd:
+	bclrb	#sign_bit,WBTEMP_EX(%a6)
+	sne	WBTEMP_SGN(%a6)
+	bsrl	ovf_res			|get correct result based on
+|					;round precision/mode.  This
+|					;sets FPSR_CC correctly
+	bfclr	WBTEMP_SGN(%a6){#0:#8}	|convert back to IEEE ext format
+	beqs	frcfpn_clr
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+frcfpn_clr:
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+|
+| Perform the write.
+|
+frcfpn:
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
+	cmpib	#3,%d0
+	bles	frc0123			|check if dest is fp0-fp3
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx WBTEMP(%a6),%d0
+	rts
+frc0123:
+	cmpib	#0,%d0
+	beqs	frc0_dst
+	cmpib	#1,%d0
+	beqs	frc1_dst
+	cmpib	#2,%d0
+	beqs	frc2_dst
+frc3_dst:
+	movel	WBTEMP_EX(%a6),USER_FP3(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP3+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP3+8(%a6)
+	rts
+frc2_dst:
+	movel	WBTEMP_EX(%a6),USER_FP2(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP2+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP2+8(%a6)
+	rts
+frc1_dst:
+	movel	WBTEMP_EX(%a6),USER_FP1(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP1+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP1+8(%a6)
+	rts
+frc0_dst:
+	movel	WBTEMP_EX(%a6),USER_FP0(%a6)
+	movel	WBTEMP_HI(%a6),USER_FP0+4(%a6)
+	movel	WBTEMP_LO(%a6),USER_FP0+8(%a6)
+	rts
+
+|
+| Write etemp to fpn.
+| A check is made on enabled and signalled snan exceptions,
+| and the destination is not overwritten if this condition exists.
+| This code is designed to make fmoveins of unsupported data types
+| faster.
+|
+wr_etemp:
+	btstb	#snan_bit,FPSR_EXCEPT(%a6)	|if snan is set, and
+	beqs	fmoveinc		|enabled, force restore
+	btstb	#snan_bit,FPCR_ENABLE(%a6) |and don't overwrite
+	beqs	fmoveinc		|the dest
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
+|						;snan handler
+	tstb	ETEMP(%a6)		|check for negative
+	blts	snan_neg
+	rts
+snan_neg:
+	orl	#neg_bit,USER_FPSR(%a6)	|snan is negative; set N
+	rts
+fmoveinc:
+	clrw	NMNEXC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+	moveb	STAG(%a6),%d0		|check if stag is inf
+	andib	#0xe0,%d0
+	cmpib	#0x40,%d0
+	bnes	fminc_cnan
+	orl	#inf_mask,USER_FPSR(%a6) |if inf, nothing yet has set I
+	tstw	LOCAL_EX(%a0)		|check sign
+	bges	fminc_con
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	fminc_con
+fminc_cnan:
+	cmpib	#0x60,%d0			|check if stag is NaN
+	bnes	fminc_czero
+	orl	#nan_mask,USER_FPSR(%a6) |if nan, nothing yet has set NaN
+	movel	ETEMP_EX(%a6),FPTEMP_EX(%a6)	|set up fptemp sign for
+|						;snan handler
+	tstw	LOCAL_EX(%a0)		|check sign
+	bges	fminc_con
+	orl	#neg_mask,USER_FPSR(%a6)
+	bra	fminc_con
+fminc_czero:
+	cmpib	#0x20,%d0			|check if zero
+	bnes	fminc_con
+	orl	#z_mask,USER_FPSR(%a6)	|if zero, set Z
+	tstw	LOCAL_EX(%a0)		|check sign
+	bges	fminc_con
+	orl	#neg_mask,USER_FPSR(%a6)
+fminc_con:
+	bfextu	CMDREG1B(%a6){#6:#3},%d0	|extract fp destination register
+	cmpib	#3,%d0
+	bles	fp0123			|check if dest is fp0-fp3
+	movel	#7,%d1
+	subl	%d0,%d1
+	clrl	%d0
+	bsetl	%d1,%d0
+	fmovemx ETEMP(%a6),%d0
+	rts
+
+fp0123:
+	cmpib	#0,%d0
+	beqs	fp0_dst
+	cmpib	#1,%d0
+	beqs	fp1_dst
+	cmpib	#2,%d0
+	beqs	fp2_dst
+fp3_dst:
+	movel	ETEMP_EX(%a6),USER_FP3(%a6)
+	movel	ETEMP_HI(%a6),USER_FP3+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP3+8(%a6)
+	rts
+fp2_dst:
+	movel	ETEMP_EX(%a6),USER_FP2(%a6)
+	movel	ETEMP_HI(%a6),USER_FP2+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP2+8(%a6)
+	rts
+fp1_dst:
+	movel	ETEMP_EX(%a6),USER_FP1(%a6)
+	movel	ETEMP_HI(%a6),USER_FP1+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP1+8(%a6)
+	rts
+fp0_dst:
+	movel	ETEMP_EX(%a6),USER_FP0(%a6)
+	movel	ETEMP_HI(%a6),USER_FP0+4(%a6)
+	movel	ETEMP_LO(%a6),USER_FP0+8(%a6)
+	rts
+
+opclass3:
+	st	CU_ONLY(%a6)
+	movew	CMDREG1B(%a6),%d0	|check if packed moveout
+	andiw	#0x0c00,%d0	|isolate last 2 bits of size field
+	cmpiw	#0x0c00,%d0	|if size is 011 or 111, it is packed
+	beq	pack_out	|else it is norm or denorm
+	bra	mv_out
+
+
+|
+|	MOVE OUT
+|
+
+mv_tbl:
+	.long	li
+	.long	sgp
+	.long	xp
+	.long	mvout_end	|should never be taken
+	.long	wi
+	.long	dp
+	.long	bi
+	.long	mvout_end	|should never be taken
+mv_out:
+	bfextu	CMDREG1B(%a6){#3:#3},%d1	|put source specifier in d1
+	leal	mv_tbl,%a0
+	movel	%a0@(%d1:l:4),%a0
+	jmp	(%a0)
+
+|
+| This exit is for move-out to memory.  The aunfl bit is
+| set if the result is inex and unfl is signalled.
+|
+mvout_end:
+	btstb	#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs	no_aufl
+	btstb	#unfl_bit,FPSR_EXCEPT(%a6)
+	beqs	no_aufl
+	bsetb	#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_aufl:
+	clrw	NMNEXC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+	fmovel	#0,%FPSR			|clear any cc bits from res_func
+|
+| Return ETEMP to extended format from internal extended format so
+| that gen_except will have a correctly signed value for ovfl/unfl
+| handlers.
+|
+	bfclr	ETEMP_SGN(%a6){#0:#8}
+	beqs	mvout_con
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+mvout_con:
+	rts
+|
+| This exit is for move-out to int register.  The aunfl bit is
+| not set in any case for this move.
+|
+mvouti_end:
+	clrw	NMNEXC(%a6)
+	bclrb	#E1,E_BYTE(%a6)
+	fmovel	#0,%FPSR			|clear any cc bits from res_func
+|
+| Return ETEMP to extended format from internal extended format so
+| that gen_except will have a correctly signed value for ovfl/unfl
+| handlers.
+|
+	bfclr	ETEMP_SGN(%a6){#0:#8}
+	beqs	mvouti_con
+	bsetb	#sign_bit,ETEMP_EX(%a6)
+mvouti_con:
+	rts
+|
+| li is used to handle a long integer source specifier
+|
+
+li:
+	moveql	#4,%d0		|set byte count
+
+	btstb	#7,STAG(%a6)	|check for extended denorm
+	bne	int_dnrm	|if so, branch
+
+	fmovemx ETEMP(%a6),%fp0-%fp0
+	fcmpd	#0x41dfffffffc00000,%fp0
+| 41dfffffffc00000 in dbl prec = 401d0000fffffffe00000000 in ext prec
+	fbge	lo_plrg
+	fcmpd	#0xc1e0000000000000,%fp0
+| c1e0000000000000 in dbl prec = c01e00008000000000000000 in ext prec
+	fble	lo_nlrg
+|
+| at this point, the answer is between the largest pos and neg values
+|
+	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
+	andil	#0x30,%d1
+	fmovel	%d1,%fpcr
+	fmovel	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
+	fmovel %fpsr,%d1
+	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
+	bra	int_wrt
+
+
+lo_plrg:
+	movel	#0x7fffffff,L_SCR1(%a6)	|answer is largest positive int
+	fbeq	int_wrt			|exact answer
+	fcmpd	#0x41dfffffffe00000,%fp0
+| 41dfffffffe00000 in dbl prec = 401d0000ffffffff00000000 in ext prec
+	fbge	int_operr		|set operr
+	bra	int_inx			|set inexact
+
+lo_nlrg:
+	movel	#0x80000000,L_SCR1(%a6)
+	fbeq	int_wrt			|exact answer
+	fcmpd	#0xc1e0000000100000,%fp0
+| c1e0000000100000 in dbl prec = c01e00008000000080000000 in ext prec
+	fblt	int_operr		|set operr
+	bra	int_inx			|set inexact
+
+|
+| wi is used to handle a word integer source specifier
+|
+
+wi:
+	moveql	#2,%d0		|set byte count
+
+	btstb	#7,STAG(%a6)	|check for extended denorm
+	bne	int_dnrm	|branch if so
+
+	fmovemx ETEMP(%a6),%fp0-%fp0
+	fcmps	#0x46fffe00,%fp0
+| 46fffe00 in sgl prec = 400d0000fffe000000000000 in ext prec
+	fbge	wo_plrg
+	fcmps	#0xc7000000,%fp0
+| c7000000 in sgl prec = c00e00008000000000000000 in ext prec
+	fble	wo_nlrg
+
+|
+| at this point, the answer is between the largest pos and neg values
+|
+	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
+	andil	#0x30,%d1
+	fmovel	%d1,%fpcr
+	fmovew	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
+	fmovel %fpsr,%d1
+	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
+	bra	int_wrt
+
+wo_plrg:
+	movew	#0x7fff,L_SCR1(%a6)	|answer is largest positive int
+	fbeq	int_wrt			|exact answer
+	fcmps	#0x46ffff00,%fp0
+| 46ffff00 in sgl prec = 400d0000ffff000000000000 in ext prec
+	fbge	int_operr		|set operr
+	bra	int_inx			|set inexact
+
+wo_nlrg:
+	movew	#0x8000,L_SCR1(%a6)
+	fbeq	int_wrt			|exact answer
+	fcmps	#0xc7000080,%fp0
+| c7000080 in sgl prec = c00e00008000800000000000 in ext prec
+	fblt	int_operr		|set operr
+	bra	int_inx			|set inexact
+
+|
+| bi is used to handle a byte integer source specifier
+|
+
+bi:
+	moveql	#1,%d0		|set byte count
+
+	btstb	#7,STAG(%a6)	|check for extended denorm
+	bne	int_dnrm	|branch if so
+
+	fmovemx ETEMP(%a6),%fp0-%fp0
+	fcmps	#0x42fe0000,%fp0
+| 42fe0000 in sgl prec = 40050000fe00000000000000 in ext prec
+	fbge	by_plrg
+	fcmps	#0xc3000000,%fp0
+| c3000000 in sgl prec = c00600008000000000000000 in ext prec
+	fble	by_nlrg
+
+|
+| at this point, the answer is between the largest pos and neg values
+|
+	movel	USER_FPCR(%a6),%d1	|use user's rounding mode
+	andil	#0x30,%d1
+	fmovel	%d1,%fpcr
+	fmoveb	%fp0,L_SCR1(%a6)	|let the 040 perform conversion
+	fmovel %fpsr,%d1
+	orl	%d1,USER_FPSR(%a6)	|capture inex2/ainex if set
+	bra	int_wrt
+
+by_plrg:
+	moveb	#0x7f,L_SCR1(%a6)		|answer is largest positive int
+	fbeq	int_wrt			|exact answer
+	fcmps	#0x42ff0000,%fp0
+| 42ff0000 in sgl prec = 40050000ff00000000000000 in ext prec
+	fbge	int_operr		|set operr
+	bra	int_inx			|set inexact
+
+by_nlrg:
+	moveb	#0x80,L_SCR1(%a6)
+	fbeq	int_wrt			|exact answer
+	fcmps	#0xc3008000,%fp0
+| c3008000 in sgl prec = c00600008080000000000000 in ext prec
+	fblt	int_operr		|set operr
+	bra	int_inx			|set inexact
+
+|
+| Common integer routines
+|
+| int_drnrm---account for possible nonzero result for round up with positive
+| operand and round down for negative answer.  In the first case (result = 1)
+| byte-width (store in d0) of result must be honored.  In the second case,
+| -1 in L_SCR1(a6) will cover all contingencies (FMOVE.B/W/L out).
+
+int_dnrm:
+	movel	#0,L_SCR1(%a6)	| initialize result to 0
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	| d1 is the rounding mode
+	cmpb	#2,%d1
+	bmis	int_inx		| if RN or RZ, done
+	bnes	int_rp		| if RP, continue below
+	tstw	ETEMP(%a6)	| RM: store -1 in L_SCR1 if src is negative
+	bpls	int_inx		| otherwise result is 0
+	movel	#-1,L_SCR1(%a6)
+	bras	int_inx
+int_rp:
+	tstw	ETEMP(%a6)	| RP: store +1 of proper width in L_SCR1 if
+|				; source is greater than 0
+	bmis	int_inx		| otherwise, result is 0
+	lea	L_SCR1(%a6),%a1	| a1 is address of L_SCR1
+	addal	%d0,%a1		| offset by destination width -1
+	subal	#1,%a1
+	bsetb	#0,(%a1)		| set low bit at a1 address
+int_inx:
+	oril	#inx2a_mask,USER_FPSR(%a6)
+	bras	int_wrt
+int_operr:
+	fmovemx %fp0-%fp0,FPTEMP(%a6)	|FPTEMP must contain the extended
+|				;precision source that needs to be
+|				;converted to integer this is required
+|				;if the operr exception is enabled.
+|				;set operr/aiop (no inex2 on int ovfl)
+
+	oril	#opaop_mask,USER_FPSR(%a6)
+|				;fall through to perform int_wrt
+int_wrt:
+	movel	EXC_EA(%a6),%a1	|load destination address
+	tstl	%a1		|check to see if it is a dest register
+	beqs	wrt_dn		|write data register
+	lea	L_SCR1(%a6),%a0	|point to supervisor source address
+	bsrl	mem_write
+	bra	mvouti_end
+
+wrt_dn:
+	movel	%d0,-(%sp)	|d0 currently contains the size to write
+	bsrl	get_fline	|get_fline returns Dn in d0
+	andiw	#0x7,%d0		|isolate register
+	movel	(%sp)+,%d1	|get size
+	cmpil	#4,%d1		|most frequent case
+	beqs	sz_long
+	cmpil	#2,%d1
+	bnes	sz_con
+	orl	#8,%d0		|add 'word' size to register#
+	bras	sz_con
+sz_long:
+	orl	#0x10,%d0		|add 'long' size to register#
+sz_con:
+	movel	%d0,%d1		|reg_dest expects size:reg in d1
+	bsrl	reg_dest	|load proper data register
+	bra	mvouti_end
+xp:
+	lea	ETEMP(%a6),%a0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	btstb	#7,STAG(%a6)	|check for extended denorm
+	bne	xdnrm
+	clrl	%d0
+	bras	do_fp		|do normal case
+sgp:
+	lea	ETEMP(%a6),%a0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+	btstb	#7,STAG(%a6)	|check for extended denorm
+	bne	sp_catas	|branch if so
+	movew	LOCAL_EX(%a0),%d0
+	lea	sp_bnds,%a1
+	cmpw	(%a1),%d0
+	blt	sp_under
+	cmpw	2(%a1),%d0
+	bgt	sp_over
+	movel	#1,%d0		|set destination format to single
+	bras	do_fp		|do normal case
+dp:
+	lea	ETEMP(%a6),%a0
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+
+	btstb	#7,STAG(%a6)	|check for extended denorm
+	bne	dp_catas	|branch if so
+
+	movew	LOCAL_EX(%a0),%d0
+	lea	dp_bnds,%a1
+
+	cmpw	(%a1),%d0
+	blt	dp_under
+	cmpw	2(%a1),%d0
+	bgt	dp_over
+
+	movel	#2,%d0		|set destination format to double
+|				;fall through to do_fp
+|
+do_fp:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|rnd mode in d1
+	swap	%d0			|rnd prec in upper word
+	addl	%d0,%d1			|d1 has PREC/MODE info
+
+	clrl	%d0			|clear g,r,s
+
+	bsrl	round			|round
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0
+
+	bfextu	CMDREG1B(%a6){#3:#3},%d1	|extract destination format
+|					;at this point only the dest
+|					;formats sgl, dbl, ext are
+|					;possible
+	cmpb	#2,%d1
+	bgts	ddbl			|double=5, extended=2, single=1
+	bnes	dsgl
+|					;fall through to dext
+dext:
+	bsrl	dest_ext
+	bra	mvout_end
+dsgl:
+	bsrl	dest_sgl
+	bra	mvout_end
+ddbl:
+	bsrl	dest_dbl
+	bra	mvout_end
+
+|
+| Handle possible denorm or catastrophic underflow cases here
+|
+xdnrm:
+	bsr	set_xop		|initialize WBTEMP
+	bsetb	#wbtemp15_bit,WB_BYTE(%a6) |set wbtemp15
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
+	bsrl	dest_ext	|store to memory
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
+	bra	mvout_end
+
+sp_under:
+	bsetb	#etemp15_bit,STAG(%a6)
+
+	cmpw	4(%a1),%d0
+	blts	sp_catas	|catastrophic underflow case
+
+	movel	#1,%d0		|load in round precision
+	movel	#sgl_thresh,%d1	|load in single denorm threshold
+	bsrl	dpspdnrm	|expects d1 to have the proper
+|				;denorm threshold
+	bsrl	dest_sgl	|stores value to destination
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
+	bra	mvout_end	|exit
+
+dp_under:
+	bsetb	#etemp15_bit,STAG(%a6)
+
+	cmpw	4(%a1),%d0
+	blts	dp_catas	|catastrophic underflow case
+
+	movel	#dbl_thresh,%d1	|load in double precision threshold
+	movel	#2,%d0
+	bsrl	dpspdnrm	|expects d1 to have proper
+|				;denorm threshold
+|				;expects d0 to have round precision
+	bsrl	dest_dbl	|store value to destination
+	bsetb	#unfl_bit,FPSR_EXCEPT(%a6)
+	bra	mvout_end	|exit
+
+|
+| Handle catastrophic underflow cases here
+|
+sp_catas:
+| Temp fix for z bit set in unf_sub
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#1,%d0		|set round precision to sgl
+
+	bsrl	unf_sub		|a0 points to result
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	#1,%d0
+	subw	%d0,LOCAL_EX(%a0) |account for difference between
+|				;denorm/norm bias
+
+	movel	%a0,%a1		|a1 has the operand input
+	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
+
+	bsrl	dest_sgl	|store the result
+	oril	#unfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+dp_catas:
+| Temp fix for z bit set in unf_sub
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#2,%d0		|set round precision to dbl
+	bsrl	unf_sub		|a0 points to result
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	#1,%d0
+	subw	%d0,LOCAL_EX(%a0) |account for difference between
+|				;denorm/norm bias
+
+	movel	%a0,%a1		|a1 has the operand input
+	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
+
+	bsrl	dest_dbl	|store the result
+	oril	#unfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+|
+| Handle catastrophic overflow cases here
+|
+sp_over:
+| Temp fix for z bit set in unf_sub
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#1,%d0
+	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
+	movel	ETEMP_EX(%a6),(%a0)
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	bsrl	ovf_res
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0
+	bsrl	dest_sgl
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+dp_over:
+| Temp fix for z bit set in ovf_res
+	movel	USER_FPSR(%a6),-(%a7)
+
+	movel	#2,%d0
+	leal	FP_SCR1(%a6),%a0	|use FP_SCR1 for creating result
+	movel	ETEMP_EX(%a6),(%a0)
+	movel	ETEMP_HI(%a6),4(%a0)
+	movel	ETEMP_LO(%a6),8(%a0)
+	bsrl	ovf_res
+
+	movel	(%a7)+,USER_FPSR(%a6)
+
+	movel	%a0,%a1
+	movel	EXC_EA(%a6),%a0
+	bsrl	dest_dbl
+	orl	#ovfinx_mask,USER_FPSR(%a6)
+	bra	mvout_end
+
+|
+|	DPSPDNRM
+|
+| This subroutine takes an extended normalized number and denormalizes
+| it to the given round precision. This subroutine also decrements
+| the input operand's exponent by 1 to account for the fact that
+| dest_sgl or dest_dbl expects a normalized number's bias.
+|
+| Input: a0  points to a normalized number in internal extended format
+|	 d0  is the round precision (=1 for sgl; =2 for dbl)
+|	 d1  is the single precision or double precision
+|	     denorm threshold
+|
+| Output: (In the format for dest_sgl or dest_dbl)
+|	 a0   points to the destination
+|	 a1   points to the operand
+|
+| Exceptions: Reports inexact 2 exception by setting USER_FPSR bits
+|
+dpspdnrm:
+	movel	%d0,-(%a7)	|save round precision
+	clrl	%d0		|clear initial g,r,s
+	bsrl	dnrm_lp		|careful with d0, it's needed by round
+
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |get rounding mode
+	swap	%d1
+	movew	2(%a7),%d1	|set rounding precision
+	swap	%d1		|at this point d1 has PREC/MODE info
+	bsrl	round		|round result, sets the inex bit in
+|				;USER_FPSR if needed
+
+	movew	#1,%d0
+	subw	%d0,LOCAL_EX(%a0) |account for difference in denorm
+|				;vs norm bias
+
+	movel	%a0,%a1		|a1 has the operand input
+	movel	EXC_EA(%a6),%a0	|a0 has the destination pointer
+	addw	#4,%a7		|pop stack
+	rts
+|
+| SET_XOP initialized WBTEMP with the value pointed to by a0
+| input: a0 points to input operand in the internal extended format
+|
+set_xop:
+	movel	LOCAL_EX(%a0),WBTEMP_EX(%a6)
+	movel	LOCAL_HI(%a0),WBTEMP_HI(%a6)
+	movel	LOCAL_LO(%a0),WBTEMP_LO(%a6)
+	bfclr	WBTEMP_SGN(%a6){#0:#8}
+	beqs	sxop
+	bsetb	#sign_bit,WBTEMP_EX(%a6)
+sxop:
+	bfclr	STAG(%a6){#5:#4}	|clear wbtm66,wbtm1,wbtm0,sbit
+	rts
+|
+|	P_MOVE
+|
+p_movet:
+	.long	p_move
+	.long	p_movez
+	.long	p_movei
+	.long	p_moven
+	.long	p_move
+p_regd:
+	.long	p_dyd0
+	.long	p_dyd1
+	.long	p_dyd2
+	.long	p_dyd3
+	.long	p_dyd4
+	.long	p_dyd5
+	.long	p_dyd6
+	.long	p_dyd7
+
+pack_out:
+	leal	p_movet,%a0	|load jmp table address
+	movew	STAG(%a6),%d0	|get source tag
+	bfextu	%d0{#16:#3},%d0	|isolate source bits
+	movel	(%a0,%d0.w*4),%a0	|load a0 with routine label for tag
+	jmp	(%a0)		|go to the routine
+
+p_write:
+	movel	#0x0c,%d0	|get byte count
+	movel	EXC_EA(%a6),%a1	|get the destination address
+	bsr	mem_write	|write the user's destination
+	moveb	#0,CU_SAVEPC(%a6) |set the cu save pc to all 0's
+
+|
+| Also note that the dtag must be set to norm here - this is because
+| the 040 uses the dtag to execute the correct microcode.
+|
+        bfclr    DTAG(%a6){#0:#3}  |set dtag to norm
+
+	rts
+
+| Notes on handling of special case (zero, inf, and nan) inputs:
+|	1. Operr is not signalled if the k-factor is greater than 18.
+|	2. Per the manual, status bits are not set.
+|
+
+p_move:
+	movew	CMDREG1B(%a6),%d0
+	btstl	#kfact_bit,%d0	|test for dynamic k-factor
+	beqs	statick		|if clear, k-factor is static
+dynamick:
+	bfextu	%d0{#25:#3},%d0	|isolate register for dynamic k-factor
+	lea	p_regd,%a0
+	movel	%a0@(%d0:l:4),%a0
+	jmp	(%a0)
+statick:
+	andiw	#0x007f,%d0	|get k-factor
+	bfexts	%d0{#25:#7},%d0	|sign extend d0 for bindec
+	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
+	bsrl	bindec		|perform the convert; data at a6
+	leal	FP_SCR1(%a6),%a0	|load a0 with result address
+	bral	p_write
+p_movez:
+	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
+	clrw	2(%a0)		|clear lower word of exp
+	clrl	4(%a0)		|load second lword of ZERO
+	clrl	8(%a0)		|load third lword of ZERO
+	bra	p_write		|go write results
+p_movei:
+	fmovel	#0,%FPSR		|clear aiop
+	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
+	clrw	2(%a0)		|clear lower word of exp
+	bra	p_write		|go write the result
+p_moven:
+	leal	ETEMP(%a6),%a0	|a0 will point to the packed decimal
+	clrw	2(%a0)		|clear lower word of exp
+	bra	p_write		|go write the result
+
+|
+| Routines to read the dynamic k-factor from Dn.
+|
+p_dyd0:
+	movel	USER_D0(%a6),%d0
+	bras	statick
+p_dyd1:
+	movel	USER_D1(%a6),%d0
+	bras	statick
+p_dyd2:
+	movel	%d2,%d0
+	bras	statick
+p_dyd3:
+	movel	%d3,%d0
+	bras	statick
+p_dyd4:
+	movel	%d4,%d0
+	bras	statick
+p_dyd5:
+	movel	%d5,%d0
+	bras	statick
+p_dyd6:
+	movel	%d6,%d0
+	bra	statick
+p_dyd7:
+	movel	%d7,%d0
+	bra	statick
+
+	|end
diff --git a/arch/m68k/fpsp040/round.S b/arch/m68k/fpsp040/round.S
new file mode 100644
index 0000000..00f9806
--- /dev/null
+++ b/arch/m68k/fpsp040/round.S
@@ -0,0 +1,649 @@
+|
+|	round.sa 3.4 7/29/91
+|
+|	handle rounding and normalization tasks
+|
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|ROUND	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+|
+|	round --- round result according to precision/mode
+|
+|	a0 points to the input operand in the internal extended format
+|	d1(high word) contains rounding precision:
+|		ext = $0000xxxx
+|		sgl = $0001xxxx
+|		dbl = $0002xxxx
+|	d1(low word) contains rounding mode:
+|		RN  = $xxxx0000
+|		RZ  = $xxxx0001
+|		RM  = $xxxx0010
+|		RP  = $xxxx0011
+|	d0{31:29} contains the g,r,s bits (extended)
+|
+|	On return the value pointed to by a0 is correctly rounded,
+|	a0 is preserved and the g-r-s bits in d0 are cleared.
+|	The result is not typed - the tag field is invalid.  The
+|	result is still in the internal extended format.
+|
+|	The INEX bit of USER_FPSR will be set if the rounded result was
+|	inexact (i.e. if any of the g-r-s bits were set).
+|
+
+	.global	round
+round:
+| If g=r=s=0 then result is exact and round is done, else set
+| the inex flag in status reg and continue.
+|
+	bsrs	ext_grs			|this subroutine looks at the
+|					:rounding precision and sets
+|					;the appropriate g-r-s bits.
+	tstl	%d0			|if grs are zero, go force
+	bne	rnd_cont		|lower bits to zero for size
+
+	swap	%d1			|set up d1.w for round prec.
+	bra	truncate
+
+rnd_cont:
+|
+| Use rounding mode as an index into a jump table for these modes.
+|
+	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+	lea	mode_tab,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+|
+| Jump table indexed by rounding mode in d1.w.  All following assumes
+| grs != 0.
+|
+mode_tab:
+	.long	rnd_near
+	.long	rnd_zero
+	.long	rnd_mnus
+	.long	rnd_plus
+|
+|	ROUND PLUS INFINITY
+|
+|	If sign of fp number = 0 (positive), then add 1 to l.
+|
+rnd_plus:
+	swap	%d1			|set up d1 for round prec.
+	tstb	LOCAL_SGN(%a0)		|check for sign
+	bmi	truncate		|if positive then truncate
+	movel	#0xffffffff,%d0		|force g,r,s to be all f's
+	lea	add_to_l,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+|
+|	ROUND MINUS INFINITY
+|
+|	If sign of fp number = 1 (negative), then add 1 to l.
+|
+rnd_mnus:
+	swap	%d1			|set up d1 for round prec.
+	tstb	LOCAL_SGN(%a0)		|check for sign
+	bpl	truncate		|if negative then truncate
+	movel	#0xffffffff,%d0		|force g,r,s to be all f's
+	lea	add_to_l,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+|
+|	ROUND ZERO
+|
+|	Always truncate.
+rnd_zero:
+	swap	%d1			|set up d1 for round prec.
+	bra	truncate
+|
+|
+|	ROUND NEAREST
+|
+|	If (g=1), then add 1 to l and if (r=s=0), then clear l
+|	Note that this will round to even in case of a tie.
+|
+rnd_near:
+	swap	%d1			|set up d1 for round prec.
+	asll	#1,%d0			|shift g-bit to c-bit
+	bcc	truncate		|if (g=1) then
+	lea	add_to_l,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+|
+|	ext_grs --- extract guard, round and sticky bits
+|
+| Input:	d1 =		PREC:ROUND
+| Output:	d0{31:29}=	guard, round, sticky
+|
+| The ext_grs extract the guard/round/sticky bits according to the
+| selected rounding precision. It is called by the round subroutine
+| only.  All registers except d0 are kept intact. d0 becomes an
+| updated guard,round,sticky in d0{31:29}
+|
+| Notes: the ext_grs uses the round PREC, and therefore has to swap d1
+|	 prior to usage, and needs to restore d1 to original.
+|
+ext_grs:
+	swap	%d1			|have d1.w point to round precision
+	cmpiw	#0,%d1
+	bnes	sgl_or_dbl
+	bras	end_ext_grs
+
+sgl_or_dbl:
+	moveml	%d2/%d3,-(%a7)		|make some temp registers
+	cmpiw	#1,%d1
+	bnes	grs_dbl
+grs_sgl:
+	bfextu	LOCAL_HI(%a0){#24:#2},%d3	|sgl prec. g-r are 2 bits right
+	movel	#30,%d2			|of the sgl prec. limits
+	lsll	%d2,%d3			|shift g-r bits to MSB of d3
+	movel	LOCAL_HI(%a0),%d2		|get word 2 for s-bit test
+	andil	#0x0000003f,%d2		|s bit is the or of all other
+	bnes	st_stky			|bits to the right of g-r
+	tstl	LOCAL_LO(%a0)		|test lower mantissa
+	bnes	st_stky			|if any are set, set sticky
+	tstl	%d0			|test original g,r,s
+	bnes	st_stky			|if any are set, set sticky
+	bras	end_sd			|if words 3 and 4 are clr, exit
+grs_dbl:
+	bfextu	LOCAL_LO(%a0){#21:#2},%d3	|dbl-prec. g-r are 2 bits right
+	movel	#30,%d2			|of the dbl prec. limits
+	lsll	%d2,%d3			|shift g-r bits to the MSB of d3
+	movel	LOCAL_LO(%a0),%d2		|get lower mantissa  for s-bit test
+	andil	#0x000001ff,%d2		|s bit is the or-ing of all
+	bnes	st_stky			|other bits to the right of g-r
+	tstl	%d0			|test word original g,r,s
+	bnes	st_stky			|if any are set, set sticky
+	bras	end_sd			|if clear, exit
+st_stky:
+	bset	#rnd_stky_bit,%d3
+end_sd:
+	movel	%d3,%d0			|return grs to d0
+	moveml	(%a7)+,%d2/%d3		|restore scratch registers
+end_ext_grs:
+	swap	%d1			|restore d1 to original
+	rts
+
+|*******************  Local Equates
+	.set	ad_1_sgl,0x00000100	|  constant to add 1 to l-bit in sgl prec
+	.set	ad_1_dbl,0x00000800	|  constant to add 1 to l-bit in dbl prec
+
+
+|Jump table for adding 1 to the l-bit indexed by rnd prec
+
+add_to_l:
+	.long	add_ext
+	.long	add_sgl
+	.long	add_dbl
+	.long	add_dbl
+|
+|	ADD SINGLE
+|
+add_sgl:
+	addl	#ad_1_sgl,LOCAL_HI(%a0)
+	bccs	scc_clr			|no mantissa overflow
+	roxrw  LOCAL_HI(%a0)		|shift v-bit back in
+	roxrw  LOCAL_HI+2(%a0)		|shift v-bit back in
+	addw	#0x1,LOCAL_EX(%a0)	|and incr exponent
+scc_clr:
+	tstl	%d0			|test for rs = 0
+	bnes	sgl_done
+	andiw  #0xfe00,LOCAL_HI+2(%a0)	|clear the l-bit
+sgl_done:
+	andil	#0xffffff00,LOCAL_HI(%a0) |truncate bits beyond sgl limit
+	clrl	LOCAL_LO(%a0)		|clear d2
+	rts
+
+|
+|	ADD EXTENDED
+|
+add_ext:
+	addql  #1,LOCAL_LO(%a0)		|add 1 to l-bit
+	bccs	xcc_clr			|test for carry out
+	addql  #1,LOCAL_HI(%a0)		|propagate carry
+	bccs	xcc_clr
+	roxrw  LOCAL_HI(%a0)		|mant is 0 so restore v-bit
+	roxrw  LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
+	roxrw	LOCAL_LO(%a0)
+	roxrw	LOCAL_LO+2(%a0)
+	addw	#0x1,LOCAL_EX(%a0)	|and inc exp
+xcc_clr:
+	tstl	%d0			|test rs = 0
+	bnes	add_ext_done
+	andib	#0xfe,LOCAL_LO+3(%a0)	|clear the l bit
+add_ext_done:
+	rts
+|
+|	ADD DOUBLE
+|
+add_dbl:
+	addl	#ad_1_dbl,LOCAL_LO(%a0)
+	bccs	dcc_clr
+	addql	#1,LOCAL_HI(%a0)		|propagate carry
+	bccs	dcc_clr
+	roxrw	LOCAL_HI(%a0)		|mant is 0 so restore v-bit
+	roxrw	LOCAL_HI+2(%a0)		|mant is 0 so restore v-bit
+	roxrw	LOCAL_LO(%a0)
+	roxrw	LOCAL_LO+2(%a0)
+	addw	#0x1,LOCAL_EX(%a0)	|incr exponent
+dcc_clr:
+	tstl	%d0			|test for rs = 0
+	bnes	dbl_done
+	andiw	#0xf000,LOCAL_LO+2(%a0)	|clear the l-bit
+
+dbl_done:
+	andil	#0xfffff800,LOCAL_LO(%a0) |truncate bits beyond dbl limit
+	rts
+
+error:
+	rts
+|
+| Truncate all other bits
+|
+trunct:
+	.long	end_rnd
+	.long	sgl_done
+	.long	dbl_done
+	.long	dbl_done
+
+truncate:
+	lea	trunct,%a1
+	movel	(%a1,%d1.w*4),%a1
+	jmp	(%a1)
+
+end_rnd:
+	rts
+
+|
+|	NORMALIZE
+|
+| These routines (nrm_zero & nrm_set) normalize the unnorm.  This
+| is done by shifting the mantissa left while decrementing the
+| exponent.
+|
+| NRM_SET shifts and decrements until there is a 1 set in the integer
+| bit of the mantissa (msb in d1).
+|
+| NRM_ZERO shifts and decrements until there is a 1 set in the integer
+| bit of the mantissa (msb in d1) unless this would mean the exponent
+| would go less than 0.  In that case the number becomes a denorm - the
+| exponent (d0) is set to 0 and the mantissa (d1 & d2) is not
+| normalized.
+|
+| Note that both routines have been optimized (for the worst case) and
+| therefore do not have the easy to follow decrement/shift loop.
+|
+|	NRM_ZERO
+|
+|	Distance to first 1 bit in mantissa = X
+|	Distance to 0 from exponent = Y
+|	If X < Y
+|	Then
+|	  nrm_set
+|	Else
+|	  shift mantissa by Y
+|	  set exponent = 0
+|
+|input:
+|	FP_SCR1 = exponent, ms mantissa part, ls mantissa part
+|output:
+|	L_SCR1{4} = fpte15 or ete15 bit
+|
+	.global	nrm_zero
+nrm_zero:
+	movew	LOCAL_EX(%a0),%d0
+	cmpw   #64,%d0          |see if exp > 64
+	bmis	d0_less
+	bsr	nrm_set		|exp > 64 so exp won't exceed 0
+	rts
+d0_less:
+	moveml	%d2/%d3/%d5/%d6,-(%a7)
+	movel	LOCAL_HI(%a0),%d1
+	movel	LOCAL_LO(%a0),%d2
+
+	bfffo	%d1{#0:#32},%d3	|get the distance to the first 1
+|				;in ms mant
+	beqs	ms_clr		|branch if no bits were set
+	cmpw	%d3,%d0		|of X>Y
+	bmis	greater		|then exp will go past 0 (neg) if
+|				;it is just shifted
+	bsr	nrm_set		|else exp won't go past 0
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+greater:
+	movel	%d2,%d6		|save ls mant in d6
+	lsll	%d0,%d2		|shift ls mant by count
+	lsll	%d0,%d1		|shift ms mant by count
+	movel	#32,%d5
+	subl	%d0,%d5		|make op a denorm by shifting bits
+	lsrl	%d5,%d6		|by the number in the exp, then
+|				;set exp = 0.
+	orl	%d6,%d1		|shift the ls mant bits into the ms mant
+	movel	#0,%d0		|same as if decremented exp to 0
+|				;while shifting
+	movew	%d0,LOCAL_EX(%a0)
+	movel	%d1,LOCAL_HI(%a0)
+	movel	%d2,LOCAL_LO(%a0)
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+ms_clr:
+	bfffo	%d2{#0:#32},%d3	|check if any bits set in ls mant
+	beqs	all_clr		|branch if none set
+	addw	#32,%d3
+	cmpw	%d3,%d0		|if X>Y
+	bmis	greater		|then branch
+	bsr	nrm_set		|else exp won't go past 0
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+all_clr:
+	movew	#0,LOCAL_EX(%a0)	|no mantissa bits set. Set exp = 0.
+	moveml	(%a7)+,%d2/%d3/%d5/%d6
+	rts
+|
+|	NRM_SET
+|
+	.global	nrm_set
+nrm_set:
+	movel	%d7,-(%a7)
+	bfffo	LOCAL_HI(%a0){#0:#32},%d7 |find first 1 in ms mant to d7)
+	beqs	lower		|branch if ms mant is all 0's
+
+	movel	%d6,-(%a7)
+
+	subw	%d7,LOCAL_EX(%a0)	|sub exponent by count
+	movel	LOCAL_HI(%a0),%d0	|d0 has ms mant
+	movel	LOCAL_LO(%a0),%d1 |d1 has ls mant
+
+	lsll	%d7,%d0		|shift first 1 to j bit position
+	movel	%d1,%d6		|copy ls mant into d6
+	lsll	%d7,%d6		|shift ls mant by count
+	movel	%d6,LOCAL_LO(%a0)	|store ls mant into memory
+	moveql	#32,%d6
+	subl	%d7,%d6		|continue shift
+	lsrl	%d6,%d1		|shift off all bits but those that will
+|				;be shifted into ms mant
+	orl	%d1,%d0		|shift the ls mant bits into the ms mant
+	movel	%d0,LOCAL_HI(%a0)	|store ms mant into memory
+	moveml	(%a7)+,%d7/%d6	|restore registers
+	rts
+
+|
+| We get here if ms mant was = 0, and we assume ls mant has bits
+| set (otherwise this would have been tagged a zero not a denorm).
+|
+lower:
+	movew	LOCAL_EX(%a0),%d0	|d0 has exponent
+	movel	LOCAL_LO(%a0),%d1	|d1 has ls mant
+	subw	#32,%d0		|account for ms mant being all zeros
+	bfffo	%d1{#0:#32},%d7	|find first 1 in ls mant to d7)
+	subw	%d7,%d0		|subtract shift count from exp
+	lsll	%d7,%d1		|shift first 1 to integer bit in ms mant
+	movew	%d0,LOCAL_EX(%a0)	|store ms mant
+	movel	%d1,LOCAL_HI(%a0)	|store exp
+	clrl	LOCAL_LO(%a0)	|clear ls mant
+	movel	(%a7)+,%d7
+	rts
+|
+|	denorm --- denormalize an intermediate result
+|
+|	Used by underflow.
+|
+| Input:
+|	a0	 points to the operand to be denormalized
+|		 (in the internal extended format)
+|
+|	d0:	 rounding precision
+| Output:
+|	a0	 points to the denormalized result
+|		 (in the internal extended format)
+|
+|	d0	is guard,round,sticky
+|
+| d0 comes into this routine with the rounding precision. It
+| is then loaded with the denormalized exponent threshold for the
+| rounding precision.
+|
+
+	.global	denorm
+denorm:
+	btstb	#6,LOCAL_EX(%a0)	|check for exponents between $7fff-$4000
+	beqs	no_sgn_ext
+	bsetb	#7,LOCAL_EX(%a0)	|sign extend if it is so
+no_sgn_ext:
+
+	cmpib	#0,%d0		|if 0 then extended precision
+	bnes	not_ext		|else branch
+
+	clrl	%d1		|load d1 with ext threshold
+	clrl	%d0		|clear the sticky flag
+	bsr	dnrm_lp		|denormalize the number
+	tstb	%d1		|check for inex
+	beq	no_inex		|if clr, no inex
+	bras	dnrm_inex	|if set, set inex
+
+not_ext:
+	cmpil	#1,%d0		|if 1 then single precision
+	beqs	load_sgl	|else must be 2, double prec
+
+load_dbl:
+	movew	#dbl_thresh,%d1	|put copy of threshold in d1
+	movel	%d1,%d0		|copy d1 into d0
+	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
+	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
+	bpls	chk_stky	|then branch (all bits would be
+|				; shifted off in denorm routine)
+	clrl	%d0		|else clear the sticky flag
+	bsr	dnrm_lp		|denormalize the number
+	tstb	%d1		|check flag
+	beqs	no_inex		|if clr, no inex
+	bras	dnrm_inex	|if set, set inex
+
+load_sgl:
+	movew	#sgl_thresh,%d1	|put copy of threshold in d1
+	movel	%d1,%d0		|copy d1 into d0
+	subw	LOCAL_EX(%a0),%d0	|diff = threshold - exp
+	cmpw	#67,%d0		|if diff > 67 (mant + grs bits)
+	bpls	chk_stky	|then branch (all bits would be
+|				; shifted off in denorm routine)
+	clrl	%d0		|else clear the sticky flag
+	bsr	dnrm_lp		|denormalize the number
+	tstb	%d1		|check flag
+	beqs	no_inex		|if clr, no inex
+	bras	dnrm_inex	|if set, set inex
+
+chk_stky:
+	tstl	LOCAL_HI(%a0)	|check for any bits set
+	bnes	set_stky
+	tstl	LOCAL_LO(%a0)	|check for any bits set
+	bnes	set_stky
+	bras	clr_mant
+set_stky:
+	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+	movel	#0x20000000,%d0	|set sticky bit in return value
+clr_mant:
+	movew	%d1,LOCAL_EX(%a0)		|load exp with threshold
+	movel	#0,LOCAL_HI(%a0)	|set d1 = 0 (ms mantissa)
+	movel	#0,LOCAL_LO(%a0)		|set d2 = 0 (ms mantissa)
+	rts
+dnrm_inex:
+	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+no_inex:
+	rts
+
+|
+|	dnrm_lp --- normalize exponent/mantissa to specified threshold
+|
+| Input:
+|	a0		points to the operand to be denormalized
+|	d0{31:29}	initial guard,round,sticky
+|	d1{15:0}	denormalization threshold
+| Output:
+|	a0		points to the denormalized operand
+|	d0{31:29}	final guard,round,sticky
+|	d1.b		inexact flag:  all ones means inexact result
+|
+| The LOCAL_LO and LOCAL_GRS parts of the value are copied to FP_SCR2
+| so that bfext can be used to extract the new low part of the mantissa.
+| Dnrm_lp can be called with a0 pointing to ETEMP or WBTEMP and there
+| is no LOCAL_GRS scratch word following it on the fsave frame.
+|
+	.global	dnrm_lp
+dnrm_lp:
+	movel	%d2,-(%sp)		|save d2 for temp use
+	btstb	#E3,E_BYTE(%a6)		|test for type E3 exception
+	beqs	not_E3			|not type E3 exception
+	bfextu	WBTEMP_GRS(%a6){#6:#3},%d2	|extract guard,round, sticky  bit
+	movel	#29,%d0
+	lsll	%d0,%d2			|shift g,r,s to their positions
+	movel	%d2,%d0
+not_E3:
+	movel	(%sp)+,%d2		|restore d2
+	movel	LOCAL_LO(%a0),FP_SCR2+LOCAL_LO(%a6)
+	movel	%d0,FP_SCR2+LOCAL_GRS(%a6)
+	movel	%d1,%d0			|copy the denorm threshold
+	subw	LOCAL_EX(%a0),%d1		|d1 = threshold - uns exponent
+	bles	no_lp			|d1 <= 0
+	cmpw	#32,%d1
+	blts	case_1			|0 = d1 < 32
+	cmpw	#64,%d1
+	blts	case_2			|32 <= d1 < 64
+	bra	case_3			|d1 >= 64
+|
+| No normalization necessary
+|
+no_lp:
+	clrb	%d1			|set no inex2 reported
+	movel	FP_SCR2+LOCAL_GRS(%a6),%d0	|restore original g,r,s
+	rts
+|
+| case (0<d1<32)
+|
+case_1:
+	movel	%d2,-(%sp)
+	movew	%d0,LOCAL_EX(%a0)		|exponent = denorm threshold
+	movel	#32,%d0
+	subw	%d1,%d0			|d0 = 32 - d1
+	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
+	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_HI
+	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new LOCAL_LO
+	bfextu	FP_SCR2+LOCAL_LO(%a6){%d0:#32},%d0	|d0 = new G,R,S
+	movel	%d2,LOCAL_HI(%a0)		|store new LOCAL_HI
+	movel	%d1,LOCAL_LO(%a0)		|store new LOCAL_LO
+	clrb	%d1
+	bftst	%d0{#2:#30}
+	beqs	c1nstky
+	bsetl	#rnd_stky_bit,%d0
+	st	%d1
+c1nstky:
+	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
+	andil	#0xe0000000,%d2		|clear all but G,R,S
+	tstl	%d2			|test if original G,R,S are clear
+	beqs	grs_clear
+	orl	#0x20000000,%d0		|set sticky bit in d0
+grs_clear:
+	andil	#0xe0000000,%d0		|clear all but G,R,S
+	movel	(%sp)+,%d2
+	rts
+|
+| case (32<=d1<64)
+|
+case_2:
+	movel	%d2,-(%sp)
+	movew	%d0,LOCAL_EX(%a0)		|unsigned exponent = threshold
+	subw	#32,%d1			|d1 now between 0 and 32
+	movel	#32,%d0
+	subw	%d1,%d0			|d0 = 32 - d1
+	bfextu	LOCAL_EX(%a0){%d0:#32},%d2
+	bfextu	%d2{%d1:%d0},%d2		|d2 = new LOCAL_LO
+	bfextu	LOCAL_HI(%a0){%d0:#32},%d1	|d1 = new G,R,S
+	bftst	%d1{#2:#30}
+	bnes	c2_sstky		|bra if sticky bit to be set
+	bftst	FP_SCR2+LOCAL_LO(%a6){%d0:#32}
+	bnes	c2_sstky		|bra if sticky bit to be set
+	movel	%d1,%d0
+	clrb	%d1
+	bras	end_c2
+c2_sstky:
+	movel	%d1,%d0
+	bsetl	#rnd_stky_bit,%d0
+	st	%d1
+end_c2:
+	clrl	LOCAL_HI(%a0)		|store LOCAL_HI = 0
+	movel	%d2,LOCAL_LO(%a0)		|store LOCAL_LO
+	movel	FP_SCR2+LOCAL_GRS(%a6),%d2	|restore original g,r,s
+	andil	#0xe0000000,%d2		|clear all but G,R,S
+	tstl	%d2			|test if original G,R,S are clear
+	beqs	clear_grs
+	orl	#0x20000000,%d0		|set sticky bit in d0
+clear_grs:
+	andil	#0xe0000000,%d0		|get rid of all but G,R,S
+	movel	(%sp)+,%d2
+	rts
+|
+| d1 >= 64 Force the exponent to be the denorm threshold with the
+| correct sign.
+|
+case_3:
+	movew	%d0,LOCAL_EX(%a0)
+	tstw	LOCAL_SGN(%a0)
+	bges	c3con
+c3neg:
+	orl	#0x80000000,LOCAL_EX(%a0)
+c3con:
+	cmpw	#64,%d1
+	beqs	sixty_four
+	cmpw	#65,%d1
+	beqs	sixty_five
+|
+| Shift value is out of range.  Set d1 for inex2 flag and
+| return a zero with the given threshold.
+|
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	movel	#0x20000000,%d0
+	st	%d1
+	rts
+
+sixty_four:
+	movel	LOCAL_HI(%a0),%d0
+	bfextu	%d0{#2:#30},%d1
+	andil	#0xc0000000,%d0
+	bras	c3com
+
+sixty_five:
+	movel	LOCAL_HI(%a0),%d0
+	bfextu	%d0{#1:#31},%d1
+	andil	#0x80000000,%d0
+	lsrl	#1,%d0			|shift high bit into R bit
+
+c3com:
+	tstl	%d1
+	bnes	c3ssticky
+	tstl	LOCAL_LO(%a0)
+	bnes	c3ssticky
+	tstb	FP_SCR2+LOCAL_GRS(%a6)
+	bnes	c3ssticky
+	clrb	%d1
+	bras	c3end
+
+c3ssticky:
+	bsetl	#rnd_stky_bit,%d0
+	st	%d1
+c3end:
+	clrl	LOCAL_HI(%a0)
+	clrl	LOCAL_LO(%a0)
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/sacos.S b/arch/m68k/fpsp040/sacos.S
new file mode 100644
index 0000000..83b00ab
--- /dev/null
+++ b/arch/m68k/fpsp040/sacos.S
@@ -0,0 +1,115 @@
+|
+|	sacos.sa 3.3 12/19/90
+|
+|	Description: The entry point sAcos computes the inverse cosine of
+|		an input argument; sAcosd does the same except for denormalized
+|		input.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The value arccos(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 3 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program sCOS takes approximately 310 cycles.
+|
+|	Algorithm:
+|
+|	ACOS
+|	1. If |X| >= 1, go to 3.
+|
+|	2. (|X| < 1) Calculate acos(X) by
+|		z := (1-X) / (1+X)
+|		acos(X) = 2 * atan( sqrt(z) ).
+|		Exit.
+|
+|	3. If |X| > 1, go to 5.
+|
+|	4. (|X| = 1) If X > 0, return 0. Otherwise, return Pi. Exit.
+|
+|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+|		Exit.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SACOS	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+PI:	.long 0x40000000,0xC90FDAA2,0x2168C235,0x00000000
+PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
+
+	|xref	t_operr
+	|xref	t_frcinx
+	|xref	satan
+
+	.global	sacosd
+sacosd:
+|--ACOS(X) = PI/2 FOR DENORMALIZED X
+	fmovel		%d1,%fpcr		| ...load user's rounding mode/precision
+	fmovex		PIBY2,%fp0
+	bra		t_frcinx
+
+	.global	sacos
+sacos:
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x3FFF8000,%d0
+	bges		ACOSBIG
+
+|--THIS IS THE USUAL CASE, |X| < 1
+|--ACOS(X) = 2 * ATAN(	SQRT( (1-X)/(1+X) )	)
+
+	fmoves		#0x3F800000,%fp1
+	faddx		%fp0,%fp1		| ...1+X
+	fnegx		%fp0		| ... -X
+	fadds		#0x3F800000,%fp0	| ...1-X
+	fdivx		%fp1,%fp0		| ...(1-X)/(1+X)
+	fsqrtx		%fp0		| ...SQRT((1-X)/(1+X))
+	fmovemx	%fp0-%fp0,(%a0)	| ...overwrite input
+	movel		%d1,-(%sp)	|save original users fpcr
+	clrl		%d1
+	bsr		satan		| ...ATAN(SQRT([1-X]/[1+X]))
+	fmovel		(%sp)+,%fpcr	|restore users exceptions
+	faddx		%fp0,%fp0		| ...2 * ATAN( STUFF )
+	bra		t_frcinx
+
+ACOSBIG:
+	fabsx		%fp0
+	fcmps		#0x3F800000,%fp0
+	fbgt		t_operr		|cause an operr exception
+
+|--|X| = 1, ACOS(X) = 0 OR PI
+	movel		(%a0),%d0		| ...pack exponent with upper 16 fraction
+	movew		4(%a0),%d0
+	cmpl		#0,%d0		|D0 has original exponent+fraction
+	bgts		ACOSP1
+
+|--X = -1
+|Returns PI and inexact exception
+	fmovex		PI,%fp0
+	fmovel		%d1,%FPCR
+	fadds		#0x00800000,%fp0	|cause an inexact exception to be put
+|					;into the 040 - will not trap until next
+|					;fp inst.
+	bra		t_frcinx
+
+ACOSP1:
+	fmovel		%d1,%FPCR
+	fmoves		#0x00000000,%fp0
+	rts				|Facos ; of +1 is exact
+
+	|end
diff --git a/arch/m68k/fpsp040/sasin.S b/arch/m68k/fpsp040/sasin.S
new file mode 100644
index 0000000..5647a60
--- /dev/null
+++ b/arch/m68k/fpsp040/sasin.S
@@ -0,0 +1,104 @@
+|
+|	sasin.sa 3.3 12/19/90
+|
+|	Description: The entry point sAsin computes the inverse sine of
+|		an input argument; sAsind does the same except for denormalized
+|		input.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The value arcsin(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 3 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program sASIN takes approximately 310 cycles.
+|
+|	Algorithm:
+|
+|	ASIN
+|	1. If |X| >= 1, go to 3.
+|
+|	2. (|X| < 1) Calculate asin(X) by
+|		z := sqrt( [1-X][1+X] )
+|		asin(X) = atan( x / z ).
+|		Exit.
+|
+|	3. If |X| > 1, go to 5.
+|
+|	4. (|X| = 1) sgn := sign(X), return asin(X) := sgn * Pi/2. Exit.
+|
+|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+|		Exit.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SASIN	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+PIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
+
+	|xref	t_operr
+	|xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	satan
+
+	.global	sasind
+sasind:
+|--ASIN(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	sasin
+sasin:
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x3FFF8000,%d0
+	bges		asinbig
+
+|--THIS IS THE USUAL CASE, |X| < 1
+|--ASIN(X) = ATAN( X / SQRT( (1-X)(1+X) ) )
+
+	fmoves		#0x3F800000,%fp1
+	fsubx		%fp0,%fp1		| ...1-X
+	fmovemx	%fp2-%fp2,-(%a7)
+	fmoves		#0x3F800000,%fp2
+	faddx		%fp0,%fp2		| ...1+X
+	fmulx		%fp2,%fp1		| ...(1+X)(1-X)
+	fmovemx	(%a7)+,%fp2-%fp2
+	fsqrtx		%fp1		| ...SQRT([1-X][1+X])
+	fdivx		%fp1,%fp0		| ...X/SQRT([1-X][1+X])
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		satan
+	bra		t_frcinx
+
+asinbig:
+	fabsx		%fp0	 | ...|X|
+	fcmps		#0x3F800000,%fp0
+	fbgt		t_operr		|cause an operr exception
+
+|--|X| = 1, ASIN(X) = +- PI/2.
+
+	fmovex		PIBY2,%fp0
+	movel		(%a0),%d0
+	andil		#0x80000000,%d0	| ...SIGN BIT OF X
+	oril		#0x3F800000,%d0	| ...+-1 IN SGL FORMAT
+	movel		%d0,-(%sp)	| ...push SIGN(X) IN SGL-FMT
+	fmovel		%d1,%FPCR
+	fmuls		(%sp)+,%fp0
+	bra		t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/satan.S b/arch/m68k/fpsp040/satan.S
new file mode 100644
index 0000000..20dae22
--- /dev/null
+++ b/arch/m68k/fpsp040/satan.S
@@ -0,0 +1,478 @@
+|
+|	satan.sa 3.3 12/19/90
+|
+|	The entry point satan computes the arctangent of an
+|	input value. satand does the same except the input value is a
+|	denormalized number.
+|
+|	Input: Double-extended value in memory location pointed to by address
+|		register a0.
+|
+|	Output:	Arctan(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 2 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program satan takes approximately 160 cycles for input
+|		argument X such that 1/16 < |X| < 16. For the other arguments,
+|		the program will run no worse than 10% slower.
+|
+|	Algorithm:
+|	Step 1. If |X| >= 16 or |X| < 1/16, go to Step 5.
+|
+|	Step 2. Let X = sgn * 2**k * 1.xxxxxxxx...x. Note that k = -4, -3,..., or 3.
+|		Define F = sgn * 2**k * 1.xxxx1, i.e. the first 5 significant bits
+|		of X with a bit-1 attached at the 6-th bit position. Define u
+|		to be u = (X-F) / (1 + X*F).
+|
+|	Step 3. Approximate arctan(u) by a polynomial poly.
+|
+|	Step 4. Return arctan(F) + poly, arctan(F) is fetched from a table of values
+|		calculated beforehand. Exit.
+|
+|	Step 5. If |X| >= 16, go to Step 7.
+|
+|	Step 6. Approximate arctan(X) by an odd polynomial in X. Exit.
+|
+|	Step 7. Define X' = -1/X. Approximate arctan(X') by an odd polynomial in X'.
+|		Arctan(X) = sign(X)*Pi/2 + arctan(X'). Exit.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|satan	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+BOUNDS1:	.long 0x3FFB8000,0x4002FFFF
+
+ONE:	.long 0x3F800000
+
+	.long 0x00000000
+
+ATANA3:	.long 0xBFF6687E,0x314987D8
+ATANA2:	.long 0x4002AC69,0x34A26DB3
+
+ATANA1:	.long 0xBFC2476F,0x4E1DA28E
+ATANB6:	.long 0x3FB34444,0x7F876989
+
+ATANB5:	.long 0xBFB744EE,0x7FAF45DB
+ATANB4:	.long 0x3FBC71C6,0x46940220
+
+ATANB3:	.long 0xBFC24924,0x921872F9
+ATANB2:	.long 0x3FC99999,0x99998FA9
+
+ATANB1:	.long 0xBFD55555,0x55555555
+ATANC5:	.long 0xBFB70BF3,0x98539E6A
+
+ATANC4:	.long 0x3FBC7187,0x962D1D7D
+ATANC3:	.long 0xBFC24924,0x827107B8
+
+ATANC2:	.long 0x3FC99999,0x9996263E
+ATANC1:	.long 0xBFD55555,0x55555536
+
+PPIBY2:	.long 0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
+NPIBY2:	.long 0xBFFF0000,0xC90FDAA2,0x2168C235,0x00000000
+PTINY:	.long 0x00010000,0x80000000,0x00000000,0x00000000
+NTINY:	.long 0x80010000,0x80000000,0x00000000,0x00000000
+
+ATANTBL:
+	.long	0x3FFB0000,0x83D152C5,0x060B7A51,0x00000000
+	.long	0x3FFB0000,0x8BC85445,0x65498B8B,0x00000000
+	.long	0x3FFB0000,0x93BE4060,0x17626B0D,0x00000000
+	.long	0x3FFB0000,0x9BB3078D,0x35AEC202,0x00000000
+	.long	0x3FFB0000,0xA3A69A52,0x5DDCE7DE,0x00000000
+	.long	0x3FFB0000,0xAB98E943,0x62765619,0x00000000
+	.long	0x3FFB0000,0xB389E502,0xF9C59862,0x00000000
+	.long	0x3FFB0000,0xBB797E43,0x6B09E6FB,0x00000000
+	.long	0x3FFB0000,0xC367A5C7,0x39E5F446,0x00000000
+	.long	0x3FFB0000,0xCB544C61,0xCFF7D5C6,0x00000000
+	.long	0x3FFB0000,0xD33F62F8,0x2488533E,0x00000000
+	.long	0x3FFB0000,0xDB28DA81,0x62404C77,0x00000000
+	.long	0x3FFB0000,0xE310A407,0x8AD34F18,0x00000000
+	.long	0x3FFB0000,0xEAF6B0A8,0x188EE1EB,0x00000000
+	.long	0x3FFB0000,0xF2DAF194,0x9DBE79D5,0x00000000
+	.long	0x3FFB0000,0xFABD5813,0x61D47E3E,0x00000000
+	.long	0x3FFC0000,0x8346AC21,0x0959ECC4,0x00000000
+	.long	0x3FFC0000,0x8B232A08,0x304282D8,0x00000000
+	.long	0x3FFC0000,0x92FB70B8,0xD29AE2F9,0x00000000
+	.long	0x3FFC0000,0x9ACF476F,0x5CCD1CB4,0x00000000
+	.long	0x3FFC0000,0xA29E7630,0x4954F23F,0x00000000
+	.long	0x3FFC0000,0xAA68C5D0,0x8AB85230,0x00000000
+	.long	0x3FFC0000,0xB22DFFFD,0x9D539F83,0x00000000
+	.long	0x3FFC0000,0xB9EDEF45,0x3E900EA5,0x00000000
+	.long	0x3FFC0000,0xC1A85F1C,0xC75E3EA5,0x00000000
+	.long	0x3FFC0000,0xC95D1BE8,0x28138DE6,0x00000000
+	.long	0x3FFC0000,0xD10BF300,0x840D2DE4,0x00000000
+	.long	0x3FFC0000,0xD8B4B2BA,0x6BC05E7A,0x00000000
+	.long	0x3FFC0000,0xE0572A6B,0xB42335F6,0x00000000
+	.long	0x3FFC0000,0xE7F32A70,0xEA9CAA8F,0x00000000
+	.long	0x3FFC0000,0xEF888432,0x64ECEFAA,0x00000000
+	.long	0x3FFC0000,0xF7170A28,0xECC06666,0x00000000
+	.long	0x3FFD0000,0x812FD288,0x332DAD32,0x00000000
+	.long	0x3FFD0000,0x88A8D1B1,0x218E4D64,0x00000000
+	.long	0x3FFD0000,0x9012AB3F,0x23E4AEE8,0x00000000
+	.long	0x3FFD0000,0x976CC3D4,0x11E7F1B9,0x00000000
+	.long	0x3FFD0000,0x9EB68949,0x3889A227,0x00000000
+	.long	0x3FFD0000,0xA5EF72C3,0x4487361B,0x00000000
+	.long	0x3FFD0000,0xAD1700BA,0xF07A7227,0x00000000
+	.long	0x3FFD0000,0xB42CBCFA,0xFD37EFB7,0x00000000
+	.long	0x3FFD0000,0xBB303A94,0x0BA80F89,0x00000000
+	.long	0x3FFD0000,0xC22115C6,0xFCAEBBAF,0x00000000
+	.long	0x3FFD0000,0xC8FEF3E6,0x86331221,0x00000000
+	.long	0x3FFD0000,0xCFC98330,0xB4000C70,0x00000000
+	.long	0x3FFD0000,0xD6807AA1,0x102C5BF9,0x00000000
+	.long	0x3FFD0000,0xDD2399BC,0x31252AA3,0x00000000
+	.long	0x3FFD0000,0xE3B2A855,0x6B8FC517,0x00000000
+	.long	0x3FFD0000,0xEA2D764F,0x64315989,0x00000000
+	.long	0x3FFD0000,0xF3BF5BF8,0xBAD1A21D,0x00000000
+	.long	0x3FFE0000,0x801CE39E,0x0D205C9A,0x00000000
+	.long	0x3FFE0000,0x8630A2DA,0xDA1ED066,0x00000000
+	.long	0x3FFE0000,0x8C1AD445,0xF3E09B8C,0x00000000
+	.long	0x3FFE0000,0x91DB8F16,0x64F350E2,0x00000000
+	.long	0x3FFE0000,0x97731420,0x365E538C,0x00000000
+	.long	0x3FFE0000,0x9CE1C8E6,0xA0B8CDBA,0x00000000
+	.long	0x3FFE0000,0xA22832DB,0xCADAAE09,0x00000000
+	.long	0x3FFE0000,0xA746F2DD,0xB7602294,0x00000000
+	.long	0x3FFE0000,0xAC3EC0FB,0x997DD6A2,0x00000000
+	.long	0x3FFE0000,0xB110688A,0xEBDC6F6A,0x00000000
+	.long	0x3FFE0000,0xB5BCC490,0x59ECC4B0,0x00000000
+	.long	0x3FFE0000,0xBA44BC7D,0xD470782F,0x00000000
+	.long	0x3FFE0000,0xBEA94144,0xFD049AAC,0x00000000
+	.long	0x3FFE0000,0xC2EB4ABB,0x661628B6,0x00000000
+	.long	0x3FFE0000,0xC70BD54C,0xE602EE14,0x00000000
+	.long	0x3FFE0000,0xCD000549,0xADEC7159,0x00000000
+	.long	0x3FFE0000,0xD48457D2,0xD8EA4EA3,0x00000000
+	.long	0x3FFE0000,0xDB948DA7,0x12DECE3B,0x00000000
+	.long	0x3FFE0000,0xE23855F9,0x69E8096A,0x00000000
+	.long	0x3FFE0000,0xE8771129,0xC4353259,0x00000000
+	.long	0x3FFE0000,0xEE57C16E,0x0D379C0D,0x00000000
+	.long	0x3FFE0000,0xF3E10211,0xA87C3779,0x00000000
+	.long	0x3FFE0000,0xF919039D,0x758B8D41,0x00000000
+	.long	0x3FFE0000,0xFE058B8F,0x64935FB3,0x00000000
+	.long	0x3FFF0000,0x8155FB49,0x7B685D04,0x00000000
+	.long	0x3FFF0000,0x83889E35,0x49D108E1,0x00000000
+	.long	0x3FFF0000,0x859CFA76,0x511D724B,0x00000000
+	.long	0x3FFF0000,0x87952ECF,0xFF8131E7,0x00000000
+	.long	0x3FFF0000,0x89732FD1,0x9557641B,0x00000000
+	.long	0x3FFF0000,0x8B38CAD1,0x01932A35,0x00000000
+	.long	0x3FFF0000,0x8CE7A8D8,0x301EE6B5,0x00000000
+	.long	0x3FFF0000,0x8F46A39E,0x2EAE5281,0x00000000
+	.long	0x3FFF0000,0x922DA7D7,0x91888487,0x00000000
+	.long	0x3FFF0000,0x94D19FCB,0xDEDF5241,0x00000000
+	.long	0x3FFF0000,0x973AB944,0x19D2A08B,0x00000000
+	.long	0x3FFF0000,0x996FF00E,0x08E10B96,0x00000000
+	.long	0x3FFF0000,0x9B773F95,0x12321DA7,0x00000000
+	.long	0x3FFF0000,0x9D55CC32,0x0F935624,0x00000000
+	.long	0x3FFF0000,0x9F100575,0x006CC571,0x00000000
+	.long	0x3FFF0000,0xA0A9C290,0xD97CC06C,0x00000000
+	.long	0x3FFF0000,0xA22659EB,0xEBC0630A,0x00000000
+	.long	0x3FFF0000,0xA388B4AF,0xF6EF0EC9,0x00000000
+	.long	0x3FFF0000,0xA4D35F10,0x61D292C4,0x00000000
+	.long	0x3FFF0000,0xA60895DC,0xFBE3187E,0x00000000
+	.long	0x3FFF0000,0xA72A51DC,0x7367BEAC,0x00000000
+	.long	0x3FFF0000,0xA83A5153,0x0956168F,0x00000000
+	.long	0x3FFF0000,0xA93A2007,0x7539546E,0x00000000
+	.long	0x3FFF0000,0xAA9E7245,0x023B2605,0x00000000
+	.long	0x3FFF0000,0xAC4C84BA,0x6FE4D58F,0x00000000
+	.long	0x3FFF0000,0xADCE4A4A,0x606B9712,0x00000000
+	.long	0x3FFF0000,0xAF2A2DCD,0x8D263C9C,0x00000000
+	.long	0x3FFF0000,0xB0656F81,0xF22265C7,0x00000000
+	.long	0x3FFF0000,0xB1846515,0x0F71496A,0x00000000
+	.long	0x3FFF0000,0xB28AAA15,0x6F9ADA35,0x00000000
+	.long	0x3FFF0000,0xB37B44FF,0x3766B895,0x00000000
+	.long	0x3FFF0000,0xB458C3DC,0xE9630433,0x00000000
+	.long	0x3FFF0000,0xB525529D,0x562246BD,0x00000000
+	.long	0x3FFF0000,0xB5E2CCA9,0x5F9D88CC,0x00000000
+	.long	0x3FFF0000,0xB692CADA,0x7ACA1ADA,0x00000000
+	.long	0x3FFF0000,0xB736AEA7,0xA6925838,0x00000000
+	.long	0x3FFF0000,0xB7CFAB28,0x7E9F7B36,0x00000000
+	.long	0x3FFF0000,0xB85ECC66,0xCB219835,0x00000000
+	.long	0x3FFF0000,0xB8E4FD5A,0x20A593DA,0x00000000
+	.long	0x3FFF0000,0xB99F41F6,0x4AFF9BB5,0x00000000
+	.long	0x3FFF0000,0xBA7F1E17,0x842BBE7B,0x00000000
+	.long	0x3FFF0000,0xBB471285,0x7637E17D,0x00000000
+	.long	0x3FFF0000,0xBBFABE8A,0x4788DF6F,0x00000000
+	.long	0x3FFF0000,0xBC9D0FAD,0x2B689D79,0x00000000
+	.long	0x3FFF0000,0xBD306A39,0x471ECD86,0x00000000
+	.long	0x3FFF0000,0xBDB6C731,0x856AF18A,0x00000000
+	.long	0x3FFF0000,0xBE31CAC5,0x02E80D70,0x00000000
+	.long	0x3FFF0000,0xBEA2D55C,0xE33194E2,0x00000000
+	.long	0x3FFF0000,0xBF0B10B7,0xC03128F0,0x00000000
+	.long	0x3FFF0000,0xBF6B7A18,0xDACB778D,0x00000000
+	.long	0x3FFF0000,0xBFC4EA46,0x63FA18F6,0x00000000
+	.long	0x3FFF0000,0xC0181BDE,0x8B89A454,0x00000000
+	.long	0x3FFF0000,0xC065B066,0xCFBF6439,0x00000000
+	.long	0x3FFF0000,0xC0AE345F,0x56340AE6,0x00000000
+	.long	0x3FFF0000,0xC0F22291,0x9CB9E6A7,0x00000000
+
+	.set	X,FP_SCR1
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+	.set	XFRACLO,X+8
+
+	.set	ATANF,FP_SCR2
+	.set	ATANFHI,ATANF+4
+	.set	ATANFLO,ATANF+8
+
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+
+	.global	satand
+satand:
+|--ENTRY POINT FOR ATAN(X) FOR DENORMALIZED ARGUMENT
+
+	bra		t_extdnrm
+
+	.global	satan
+satan:
+|--ENTRY POINT FOR ATAN(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FFB8000,%d0		| ...|X| >= 1/16?
+	bges		ATANOK1
+	bra		ATANSM
+
+ATANOK1:
+	cmpil		#0x4002FFFF,%d0		| ...|X| < 16 ?
+	bles		ATANMAIN
+	bra		ATANBIG
+
+
+|--THE MOST LIKELY CASE, |X| IN [1/16, 16). WE USE TABLE TECHNIQUE
+|--THE IDEA IS ATAN(X) = ATAN(F) + ATAN( [X-F] / [1+XF] ).
+|--SO IF F IS CHOSEN TO BE CLOSE TO X AND ATAN(F) IS STORED IN
+|--A TABLE, ALL WE NEED IS TO APPROXIMATE ATAN(U) WHERE
+|--U = (X-F)/(1+XF) IS SMALL (REMEMBER F IS CLOSE TO X). IT IS
+|--TRUE THAT A DIVIDE IS NOW NEEDED, BUT THE APPROXIMATION FOR
+|--ATAN(U) IS A VERY SHORT POLYNOMIAL AND THE INDEXING TO
+|--FETCH F AND SAVING OF REGISTERS CAN BE ALL HIDED UNDER THE
+|--DIVIDE. IN THE END THIS METHOD IS MUCH FASTER THAN A TRADITIONAL
+|--ONE. NOTE ALSO THAT THE TRADITIONAL SCHEME THAT APPROXIMATE
+|--ATAN(X) DIRECTLY WILL NEED TO USE A RATIONAL APPROXIMATION
+|--(DIVISION NEEDED) ANYWAY BECAUSE A POLYNOMIAL APPROXIMATION
+|--WILL INVOLVE A VERY LONG POLYNOMIAL.
+
+|--NOW WE SEE X AS +-2^K * 1.BBBBBBB....B <- 1. + 63 BITS
+|--WE CHOSE F TO BE +-2^K * 1.BBBB1
+|--THAT IS IT MATCHES THE EXPONENT AND FIRST 5 BITS OF X, THE
+|--SIXTH BITS IS SET TO BE 1. SINCE K = -4, -3, ..., 3, THERE
+|--ARE ONLY 8 TIMES 16 = 2^7 = 128 |F|'S. SINCE ATAN(-|F|) IS
+|-- -ATAN(|F|), WE NEED TO STORE ONLY ATAN(|F|).
+
+ATANMAIN:
+
+	movew		#0x0000,XDCARE(%a6)	| ...CLEAN UP X JUST IN CASE
+	andil		#0xF8000000,XFRAC(%a6)	| ...FIRST 5 BITS
+	oril		#0x04000000,XFRAC(%a6)	| ...SET 6-TH BIT TO 1
+	movel		#0x00000000,XFRACLO(%a6)	| ...LOCATION OF X IS NOW F
+
+	fmovex		%fp0,%fp1			| ...FP1 IS X
+	fmulx		X(%a6),%fp1		| ...FP1 IS X*F, NOTE THAT X*F > 0
+	fsubx		X(%a6),%fp0		| ...FP0 IS X-F
+	fadds		#0x3F800000,%fp1		| ...FP1 IS 1 + X*F
+	fdivx		%fp1,%fp0			| ...FP0 IS U = (X-F)/(1+X*F)
+
+|--WHILE THE DIVISION IS TAKING ITS TIME, WE FETCH ATAN(|F|)
+|--CREATE ATAN(F) AND STORE IT IN ATANF, AND
+|--SAVE REGISTERS FP2.
+
+	movel		%d2,-(%a7)	| ...SAVE d2 TEMPORARILY
+	movel		%d0,%d2		| ...THE EXPO AND 16 BITS OF X
+	andil		#0x00007800,%d0	| ...4 VARYING BITS OF F'S FRACTION
+	andil		#0x7FFF0000,%d2	| ...EXPONENT OF F
+	subil		#0x3FFB0000,%d2	| ...K+4
+	asrl		#1,%d2
+	addl		%d2,%d0		| ...THE 7 BITS IDENTIFYING F
+	asrl		#7,%d0		| ...INDEX INTO TBL OF ATAN(|F|)
+	lea		ATANTBL,%a1
+	addal		%d0,%a1		| ...ADDRESS OF ATAN(|F|)
+	movel		(%a1)+,ATANF(%a6)
+	movel		(%a1)+,ATANFHI(%a6)
+	movel		(%a1)+,ATANFLO(%a6)	| ...ATANF IS NOW ATAN(|F|)
+	movel		X(%a6),%d0		| ...LOAD SIGN AND EXPO. AGAIN
+	andil		#0x80000000,%d0	| ...SIGN(F)
+	orl		%d0,ATANF(%a6)	| ...ATANF IS NOW SIGN(F)*ATAN(|F|)
+	movel		(%a7)+,%d2	| ...RESTORE d2
+
+|--THAT'S ALL I HAVE TO DO FOR NOW,
+|--BUT ALAS, THE DIVIDE IS STILL CRANKING!
+
+|--U IN FP0, WE ARE NOW READY TO COMPUTE ATAN(U) AS
+|--U + A1*U*V*(A2 + V*(A3 + V)), V = U*U
+|--THE POLYNOMIAL MAY LOOK STRANGE, BUT IS NEVERTHELESS CORRECT.
+|--THE NATURAL FORM IS U + U*V*(A1 + V*(A2 + V*A3))
+|--WHAT WE HAVE HERE IS MERELY	A1 = A3, A2 = A1/A3, A3 = A2/A3.
+|--THE REASON FOR THIS REARRANGEMENT IS TO MAKE THE INDEPENDENT
+|--PARTS A1*U*V AND (A2 + ... STUFF) MORE LOAD-BALANCED
+
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1
+	fmoved		ATANA3,%fp2
+	faddx		%fp1,%fp2		| ...A3+V
+	fmulx		%fp1,%fp2		| ...V*(A3+V)
+	fmulx		%fp0,%fp1		| ...U*V
+	faddd		ATANA2,%fp2	| ...A2+V*(A3+V)
+	fmuld		ATANA1,%fp1	| ...A1*U*V
+	fmulx		%fp2,%fp1		| ...A1*U*V*(A2+V*(A3+V))
+
+	faddx		%fp1,%fp0		| ...ATAN(U), FP1 RELEASED
+	fmovel		%d1,%FPCR		|restore users exceptions
+	faddx		ATANF(%a6),%fp0	| ...ATAN(X)
+	bra		t_frcinx
+
+ATANBORS:
+|--|X| IS IN d0 IN COMPACT FORM. FP1, d0 SAVED.
+|--FP0 IS X AND |X| <= 1/16 OR |X| >= 16.
+	cmpil		#0x3FFF8000,%d0
+	bgt		ATANBIG	| ...I.E. |X| >= 16
+
+ATANSM:
+|--|X| <= 1/16
+|--IF |X| < 2^(-40), RETURN X AS ANSWER. OTHERWISE, APPROXIMATE
+|--ATAN(X) BY X + X*Y*(B1+Y*(B2+Y*(B3+Y*(B4+Y*(B5+Y*B6)))))
+|--WHICH IS X + X*Y*( [B1+Z*(B3+Z*B5)] + [Y*(B2+Z*(B4+Z*B6)] )
+|--WHERE Y = X*X, AND Z = Y*Y.
+
+	cmpil		#0x3FD78000,%d0
+	blt		ATANTINY
+|--COMPUTE POLYNOMIAL
+	fmulx		%fp0,%fp0	| ...FP0 IS Y = X*X
+
+
+	movew		#0x0000,XDCARE(%a6)
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		| ...FP1 IS Z = Y*Y
+
+	fmoved		ATANB6,%fp2
+	fmoved		ATANB5,%fp3
+
+	fmulx		%fp1,%fp2		| ...Z*B6
+	fmulx		%fp1,%fp3		| ...Z*B5
+
+	faddd		ATANB4,%fp2	| ...B4+Z*B6
+	faddd		ATANB3,%fp3	| ...B3+Z*B5
+
+	fmulx		%fp1,%fp2		| ...Z*(B4+Z*B6)
+	fmulx		%fp3,%fp1		| ...Z*(B3+Z*B5)
+
+	faddd		ATANB2,%fp2	| ...B2+Z*(B4+Z*B6)
+	faddd		ATANB1,%fp1	| ...B1+Z*(B3+Z*B5)
+
+	fmulx		%fp0,%fp2		| ...Y*(B2+Z*(B4+Z*B6))
+	fmulx		X(%a6),%fp0		| ...X*Y
+
+	faddx		%fp2,%fp1		| ...[B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]
+
+
+	fmulx		%fp1,%fp0	| ...X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))])
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	faddx		X(%a6),%fp0
+
+	bra		t_frcinx
+
+ATANTINY:
+|--|X| < 2^(-40), ATAN(X) = X
+	movew		#0x0000,XDCARE(%a6)
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fmovex		X(%a6),%fp0	|last inst - possible exception set
+
+	bra		t_frcinx
+
+ATANBIG:
+|--IF |X| > 2^(100), RETURN	SIGN(X)*(PI/2 - TINY). OTHERWISE,
+|--RETURN SIGN(X)*PI/2 + ATAN(-1/X).
+	cmpil		#0x40638000,%d0
+	bgt		ATANHUGE
+
+|--APPROXIMATE ATAN(-1/X) BY
+|--X'+X'*Y*(C1+Y*(C2+Y*(C3+Y*(C4+Y*C5)))), X' = -1/X, Y = X'*X'
+|--THIS CAN BE RE-WRITTEN AS
+|--X'+X'*Y*( [C1+Z*(C3+Z*C5)] + [Y*(C2+Z*C4)] ), Z = Y*Y.
+
+	fmoves		#0xBF800000,%fp1	| ...LOAD -1
+	fdivx		%fp0,%fp1		| ...FP1 IS -1/X
+
+
+|--DIVIDE IS STILL CRANKING
+
+	fmovex		%fp1,%fp0		| ...FP0 IS X'
+	fmulx		%fp0,%fp0		| ...FP0 IS Y = X'*X'
+	fmovex		%fp1,X(%a6)		| ...X IS REALLY X'
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		| ...FP1 IS Z = Y*Y
+
+	fmoved		ATANC5,%fp3
+	fmoved		ATANC4,%fp2
+
+	fmulx		%fp1,%fp3		| ...Z*C5
+	fmulx		%fp1,%fp2		| ...Z*B4
+
+	faddd		ATANC3,%fp3	| ...C3+Z*C5
+	faddd		ATANC2,%fp2	| ...C2+Z*C4
+
+	fmulx		%fp3,%fp1		| ...Z*(C3+Z*C5), FP3 RELEASED
+	fmulx		%fp0,%fp2		| ...Y*(C2+Z*C4)
+
+	faddd		ATANC1,%fp1	| ...C1+Z*(C3+Z*C5)
+	fmulx		X(%a6),%fp0		| ...X'*Y
+
+	faddx		%fp2,%fp1		| ...[Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)]
+
+
+	fmulx		%fp1,%fp0		| ...X'*Y*([B1+Z*(B3+Z*B5)]
+|					...	+[Y*(B2+Z*(B4+Z*B6))])
+	faddx		X(%a6),%fp0
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+
+	btstb		#7,(%a0)
+	beqs		pos_big
+
+neg_big:
+	faddx		NPIBY2,%fp0
+	bra		t_frcinx
+
+pos_big:
+	faddx		PPIBY2,%fp0
+	bra		t_frcinx
+
+ATANHUGE:
+|--RETURN SIGN(X)*(PIBY2 - TINY) = SIGN(X)*PIBY2 - SIGN(X)*TINY
+	btstb		#7,(%a0)
+	beqs		pos_huge
+
+neg_huge:
+	fmovex		NPIBY2,%fp0
+	fmovel		%d1,%fpcr
+	fsubx		NTINY,%fp0
+	bra		t_frcinx
+
+pos_huge:
+	fmovex		PPIBY2,%fp0
+	fmovel		%d1,%fpcr
+	fsubx		PTINY,%fp0
+	bra		t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/satanh.S b/arch/m68k/fpsp040/satanh.S
new file mode 100644
index 0000000..20f0781
--- /dev/null
+++ b/arch/m68k/fpsp040/satanh.S
@@ -0,0 +1,104 @@
+|
+|	satanh.sa 3.3 12/19/90
+|
+|	The entry point satanh computes the inverse
+|	hyperbolic tangent of
+|	an input argument; satanhd does the same except for denormalized
+|	input.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The value arctanh(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 3 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program satanh takes approximately 270 cycles.
+|
+|	Algorithm:
+|
+|	ATANH
+|	1. If |X| >= 1, go to 3.
+|
+|	2. (|X| < 1) Calculate atanh(X) by
+|		sgn := sign(X)
+|		y := |X|
+|		z := 2y/(1-y)
+|		atanh(X) := sgn * (1/2) * logp1(z)
+|		Exit.
+|
+|	3. If |X| > 1, go to 5.
+|
+|	4. (|X| = 1) Generate infinity with an appropriate sign and
+|		divide-by-zero by
+|		sgn := sign(X)
+|		atan(X) := sgn / (+0).
+|		Exit.
+|
+|	5. (|X| > 1) Generate an invalid operation by 0 * infinity.
+|		Exit.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|satanh	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	t_dz
+	|xref	t_operr
+	|xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	slognp1
+
+	.global	satanhd
+satanhd:
+|--ATANH(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	satanh
+satanh:
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x3FFF8000,%d0
+	bges		ATANHBIG
+
+|--THIS IS THE USUAL CASE, |X| < 1
+|--Y = |X|, Z = 2Y/(1-Y), ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z).
+
+	fabsx		(%a0),%fp0	| ...Y = |X|
+	fmovex		%fp0,%fp1
+	fnegx		%fp1		| ...-Y
+	faddx		%fp0,%fp0		| ...2Y
+	fadds		#0x3F800000,%fp1	| ...1-Y
+	fdivx		%fp1,%fp0		| ...2Y/(1-Y)
+	movel		(%a0),%d0
+	andil		#0x80000000,%d0
+	oril		#0x3F000000,%d0	| ...SIGN(X)*HALF
+	movel		%d0,-(%sp)
+
+	fmovemx	%fp0-%fp0,(%a0)	| ...overwrite input
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slognp1		| ...LOG1P(Z)
+	fmovel		(%sp)+,%fpcr
+	fmuls		(%sp)+,%fp0
+	bra		t_frcinx
+
+ATANHBIG:
+	fabsx		(%a0),%fp0	| ...|X|
+	fcmps		#0x3F800000,%fp0
+	fbgt		t_operr
+	bra		t_dz
+
+	|end
diff --git a/arch/m68k/fpsp040/scale.S b/arch/m68k/fpsp040/scale.S
new file mode 100644
index 0000000..5c9b805
--- /dev/null
+++ b/arch/m68k/fpsp040/scale.S
@@ -0,0 +1,371 @@
+|
+|	scale.sa 3.3 7/30/91
+|
+|	The entry point sSCALE computes the destination operand
+|	scaled by the source operand.  If the absolute value of
+|	the source operand is (>= 2^14) an overflow or underflow
+|	is returned.
+|
+|	The entry point sscale is called from do_func to emulate
+|	the fscale unimplemented instruction.
+|
+|	Input: Double-extended destination operand in FPTEMP,
+|		double-extended source operand in ETEMP.
+|
+|	Output: The function returns scale(X,Y) to fp0.
+|
+|	Modifies: fp0.
+|
+|	Algorithm:
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SCALE    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	t_ovfl2
+	|xref	t_unfl
+	|xref	round
+	|xref	t_resdnrm
+
+SRC_BNDS: .short	0x3fff,0x400c
+
+|
+| This entry point is used by the unimplemented instruction exception
+| handler.
+|
+|
+|
+|	FSCALE
+|
+	.global	sscale
+sscale:
+	fmovel		#0,%fpcr		|clr user enabled exc
+	clrl		%d1
+	movew		FPTEMP(%a6),%d1	|get dest exponent
+	smi		L_SCR1(%a6)	|use L_SCR1 to hold sign
+	andil		#0x7fff,%d1	|strip sign
+	movew		ETEMP(%a6),%d0	|check src bounds
+	andiw		#0x7fff,%d0	|clr sign bit
+	cmp2w		SRC_BNDS,%d0
+	bccs		src_in
+	cmpiw		#0x400c,%d0	|test for too large
+	bge		src_out
+|
+| The source input is below 1, so we check for denormalized numbers
+| and set unfl.
+|
+src_small:
+	moveb		DTAG(%a6),%d0
+	andib		#0xe0,%d0
+	tstb		%d0
+	beqs		no_denorm
+	st		STORE_FLG(%a6)	|dest already contains result
+	orl		#unfl_mask,USER_FPSR(%a6) |set UNFL
+den_done:
+	leal		FPTEMP(%a6),%a0
+	bra		t_resdnrm
+no_denorm:
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	|simply return dest
+	rts
+
+
+|
+| Source is within 2^14 range.  To perform the int operation,
+| move it to d0.
+|
+src_in:
+	fmovex		ETEMP(%a6),%fp0	|move in src for int
+	fmovel		#rz_mode,%fpcr	|force rz for src conversion
+	fmovel		%fp0,%d0		|int src to d0
+	fmovel		#0,%FPSR		|clr status from above
+	tstw		ETEMP(%a6)	|check src sign
+	blt		src_neg
+|
+| Source is positive.  Add the src to the dest exponent.
+| The result can be denormalized, if src = 0, or overflow,
+| if the result of the add sets a bit in the upper word.
+|
+src_pos:
+	tstw		%d1		|check for denorm
+	beq		dst_dnrm
+	addl		%d0,%d1		|add src to dest exp
+	beqs		denorm		|if zero, result is denorm
+	cmpil		#0x7fff,%d1	|test for overflow
+	bges		ovfl
+	tstb		L_SCR1(%a6)
+	beqs		spos_pos
+	orw		#0x8000,%d1
+spos_pos:
+	movew		%d1,FPTEMP(%a6)	|result in FPTEMP
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	|write result to fp0
+	rts
+ovfl:
+	tstb		L_SCR1(%a6)
+	beqs		sovl_pos
+	orw		#0x8000,%d1
+sovl_pos:
+	movew		FPTEMP(%a6),ETEMP(%a6)	|result in ETEMP
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	bra		t_ovfl2
+
+denorm:
+	tstb		L_SCR1(%a6)
+	beqs		den_pos
+	orw		#0x8000,%d1
+den_pos:
+	tstl		FPTEMP_HI(%a6)	|check j bit
+	blts		nden_exit	|if set, not denorm
+	movew		%d1,ETEMP(%a6)	|input expected in ETEMP
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	orl		#unfl_bit,USER_FPSR(%a6)	|set unfl
+	leal		ETEMP(%a6),%a0
+	bra		t_resdnrm
+nden_exit:
+	movew		%d1,FPTEMP(%a6)	|result in FPTEMP
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	|write result to fp0
+	rts
+
+|
+| Source is negative.  Add the src to the dest exponent.
+| (The result exponent will be reduced).  The result can be
+| denormalized.
+|
+src_neg:
+	addl		%d0,%d1		|add src to dest
+	beqs		denorm		|if zero, result is denorm
+	blts		fix_dnrm	|if negative, result is
+|					;needing denormalization
+	tstb		L_SCR1(%a6)
+	beqs		sneg_pos
+	orw		#0x8000,%d1
+sneg_pos:
+	movew		%d1,FPTEMP(%a6)	|result in FPTEMP
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	|write result to fp0
+	rts
+
+
+|
+| The result exponent is below denorm value.  Test for catastrophic
+| underflow and force zero if true.  If not, try to shift the
+| mantissa right until a zero exponent exists.
+|
+fix_dnrm:
+	cmpiw		#0xffc0,%d1	|lower bound for normalization
+	blt		fix_unfl	|if lower, catastrophic unfl
+	movew		%d1,%d0		|use d0 for exp
+	movel		%d2,-(%a7)	|free d2 for norm
+	movel		FPTEMP_HI(%a6),%d1
+	movel		FPTEMP_LO(%a6),%d2
+	clrl		L_SCR2(%a6)
+fix_loop:
+	addw		#1,%d0		|drive d0 to 0
+	lsrl		#1,%d1		|while shifting the
+	roxrl		#1,%d2		|mantissa to the right
+	bccs		no_carry
+	st		L_SCR2(%a6)	|use L_SCR2 to capture inex
+no_carry:
+	tstw		%d0		|it is finished when
+	blts		fix_loop	|d0 is zero or the mantissa
+	tstb		L_SCR2(%a6)
+	beqs		tst_zero
+	orl		#unfl_inx_mask,USER_FPSR(%a6)
+|					;set unfl, aunfl, ainex
+|
+| Test for zero. If zero, simply use fmove to return +/- zero
+| to the fpu.
+|
+tst_zero:
+	clrw		FPTEMP_EX(%a6)
+	tstb		L_SCR1(%a6)	|test for sign
+	beqs		tst_con
+	orw		#0x8000,FPTEMP_EX(%a6) |set sign bit
+tst_con:
+	movel		%d1,FPTEMP_HI(%a6)
+	movel		%d2,FPTEMP_LO(%a6)
+	movel		(%a7)+,%d2
+	tstl		%d1
+	bnes		not_zero
+	tstl		FPTEMP_LO(%a6)
+	bnes		not_zero
+|
+| Result is zero.  Check for rounding mode to set lsb.  If the
+| mode is rp, and the zero is positive, return smallest denorm.
+| If the mode is rm, and the zero is negative, return smallest
+| negative denorm.
+|
+	btstb		#5,FPCR_MODE(%a6) |test if rm or rp
+	beqs		no_dir
+	btstb		#4,FPCR_MODE(%a6) |check which one
+	beqs		zer_rm
+zer_rp:
+	tstb		L_SCR1(%a6)	|check sign
+	bnes		no_dir		|if set, neg op, no inc
+	movel		#1,FPTEMP_LO(%a6) |set lsb
+	bras		sm_dnrm
+zer_rm:
+	tstb		L_SCR1(%a6)	|check sign
+	beqs		no_dir		|if clr, neg op, no inc
+	movel		#1,FPTEMP_LO(%a6) |set lsb
+	orl		#neg_mask,USER_FPSR(%a6) |set N
+	bras		sm_dnrm
+no_dir:
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0	|use fmove to set cc's
+	rts
+
+|
+| The rounding mode changed the zero to a smallest denorm. Call
+| t_resdnrm with exceptional operand in ETEMP.
+|
+sm_dnrm:
+	movel		FPTEMP_EX(%a6),ETEMP_EX(%a6)
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	leal		ETEMP(%a6),%a0
+	bra		t_resdnrm
+
+|
+| Result is still denormalized.
+|
+not_zero:
+	orl		#unfl_mask,USER_FPSR(%a6) |set unfl
+	tstb		L_SCR1(%a6)	|check for sign
+	beqs		fix_exit
+	orl		#neg_mask,USER_FPSR(%a6) |set N
+fix_exit:
+	bras		sm_dnrm
+
+
+|
+| The result has underflowed to zero. Return zero and set
+| unfl, aunfl, and ainex.
+|
+fix_unfl:
+	orl		#unfl_inx_mask,USER_FPSR(%a6)
+	btstb		#5,FPCR_MODE(%a6) |test if rm or rp
+	beqs		no_dir2
+	btstb		#4,FPCR_MODE(%a6) |check which one
+	beqs		zer_rm2
+zer_rp2:
+	tstb		L_SCR1(%a6)	|check sign
+	bnes		no_dir2		|if set, neg op, no inc
+	clrl		FPTEMP_EX(%a6)
+	clrl		FPTEMP_HI(%a6)
+	movel		#1,FPTEMP_LO(%a6) |set lsb
+	bras		sm_dnrm		|return smallest denorm
+zer_rm2:
+	tstb		L_SCR1(%a6)	|check sign
+	beqs		no_dir2		|if clr, neg op, no inc
+	movew		#0x8000,FPTEMP_EX(%a6)
+	clrl		FPTEMP_HI(%a6)
+	movel		#1,FPTEMP_LO(%a6) |set lsb
+	orl		#neg_mask,USER_FPSR(%a6) |set N
+	bra		sm_dnrm		|return smallest denorm
+
+no_dir2:
+	tstb		L_SCR1(%a6)
+	bges		pos_zero
+neg_zero:
+	clrl		FP_SCR1(%a6)	|clear the exceptional operand
+	clrl		FP_SCR1+4(%a6)	|for gen_except.
+	clrl		FP_SCR1+8(%a6)
+	fmoves		#0x80000000,%fp0
+	rts
+pos_zero:
+	clrl		FP_SCR1(%a6)	|clear the exceptional operand
+	clrl		FP_SCR1+4(%a6)	|for gen_except.
+	clrl		FP_SCR1+8(%a6)
+	fmoves		#0x00000000,%fp0
+	rts
+
+|
+| The destination is a denormalized number.  It must be handled
+| by first shifting the bits in the mantissa until it is normalized,
+| then adding the remainder of the source to the exponent.
+|
+dst_dnrm:
+	moveml		%d2/%d3,-(%a7)
+	movew		FPTEMP_EX(%a6),%d1
+	movel		FPTEMP_HI(%a6),%d2
+	movel		FPTEMP_LO(%a6),%d3
+dst_loop:
+	tstl		%d2		|test for normalized result
+	blts		dst_norm	|exit loop if so
+	tstl		%d0		|otherwise, test shift count
+	beqs		dst_fin		|if zero, shifting is done
+	subil		#1,%d0		|dec src
+	lsll		#1,%d3
+	roxll		#1,%d2
+	bras		dst_loop
+|
+| Destination became normalized.  Simply add the remaining
+| portion of the src to the exponent.
+|
+dst_norm:
+	addw		%d0,%d1		|dst is normalized; add src
+	tstb		L_SCR1(%a6)
+	beqs		dnrm_pos
+	orl		#0x8000,%d1
+dnrm_pos:
+	movemw		%d1,FPTEMP_EX(%a6)
+	moveml		%d2,FPTEMP_HI(%a6)
+	moveml		%d3,FPTEMP_LO(%a6)
+	fmovel		USER_FPCR(%a6),%FPCR
+	fmovex		FPTEMP(%a6),%fp0
+	moveml		(%a7)+,%d2/%d3
+	rts
+
+|
+| Destination remained denormalized.  Call t_excdnrm with
+| exceptional operand in ETEMP.
+|
+dst_fin:
+	tstb		L_SCR1(%a6)	|check for sign
+	beqs		dst_exit
+	orl		#neg_mask,USER_FPSR(%a6) |set N
+	orl		#0x8000,%d1
+dst_exit:
+	movemw		%d1,ETEMP_EX(%a6)
+	moveml		%d2,ETEMP_HI(%a6)
+	moveml		%d3,ETEMP_LO(%a6)
+	orl		#unfl_mask,USER_FPSR(%a6) |set unfl
+	moveml		(%a7)+,%d2/%d3
+	leal		ETEMP(%a6),%a0
+	bra		t_resdnrm
+
+|
+| Source is outside of 2^14 range.  Test the sign and branch
+| to the appropriate exception handler.
+|
+src_out:
+	tstb		L_SCR1(%a6)
+	beqs		scro_pos
+	orl		#0x8000,%d1
+scro_pos:
+	movel		FPTEMP_HI(%a6),ETEMP_HI(%a6)
+	movel		FPTEMP_LO(%a6),ETEMP_LO(%a6)
+	tstw		ETEMP(%a6)
+	blts		res_neg
+res_pos:
+	movew		%d1,ETEMP(%a6)	|result in ETEMP
+	bra		t_ovfl2
+res_neg:
+	movew		%d1,ETEMP(%a6)	|result in ETEMP
+	leal		ETEMP(%a6),%a0
+	bra		t_unfl
+	|end
diff --git a/arch/m68k/fpsp040/scosh.S b/arch/m68k/fpsp040/scosh.S
new file mode 100644
index 0000000..e81edbb
--- /dev/null
+++ b/arch/m68k/fpsp040/scosh.S
@@ -0,0 +1,132 @@
+|
+|	scosh.sa 3.1 12/10/90
+|
+|	The entry point sCosh computes the hyperbolic cosine of
+|	an input argument; sCoshd does the same except for denormalized
+|	input.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The value cosh(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 3 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program sCOSH takes approximately 250 cycles.
+|
+|	Algorithm:
+|
+|	COSH
+|	1. If |X| > 16380 log2, go to 3.
+|
+|	2. (|X| <= 16380 log2) Cosh(X) is obtained by the formulae
+|		y = |X|, z = exp(Y), and
+|		cosh(X) = (1/2)*( z + 1/z ).
+|		Exit.
+|
+|	3. (|X| > 16380 log2). If |X| > 16480 log2, go to 5.
+|
+|	4. (16380 log2 < |X| <= 16480 log2)
+|		cosh(X) = sign(X) * exp(|X|)/2.
+|		However, invoking exp(|X|) may cause premature overflow.
+|		Thus, we calculate sinh(X) as follows:
+|		Y	:= |X|
+|		Fact	:=	2**(16380)
+|		Y'	:= Y - 16381 log2
+|		cosh(X) := Fact * exp(Y').
+|		Exit.
+|
+|	5. (|X| > 16480 log2) sinh(X) must overflow. Return
+|		Huge*Huge to generate overflow and an infinity with
+|		the appropriate sign. Huge is the largest finite number in
+|		extended format. Exit.
+|
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SCOSH	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	t_ovfl
+	|xref	t_frcinx
+	|xref	setox
+
+T1:	.long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
+T2:	.long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
+
+TWO16380: .long 0x7FFB0000,0x80000000,0x00000000,0x00000000
+
+	.global	scoshd
+scoshd:
+|--COSH(X) = 1 FOR DENORMALIZED X
+
+	fmoves		#0x3F800000,%fp0
+
+	fmovel		%d1,%FPCR
+	fadds		#0x00800000,%fp0
+	bra		t_frcinx
+
+	.global	scosh
+scosh:
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+	cmpil		#0x400CB167,%d0
+	bgts		COSHBIG
+
+|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
+|--COSH(X) = (1/2) * ( EXP(X) + 1/EXP(X) )
+
+	fabsx		%fp0		| ...|X|
+
+	movel		%d1,-(%sp)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)	|pass parameter to setox
+	bsr		setox		| ...FP0 IS EXP(|X|)
+	fmuls		#0x3F000000,%fp0	| ...(1/2)EXP(|X|)
+	movel		(%sp)+,%d1
+
+	fmoves		#0x3E800000,%fp1	| ...(1/4)
+	fdivx		%fp0,%fp1		| ...1/(2 EXP(|X|))
+
+	fmovel		%d1,%FPCR
+	faddx		%fp1,%fp0
+
+	bra		t_frcinx
+
+COSHBIG:
+	cmpil		#0x400CB2B3,%d0
+	bgts		COSHHUGE
+
+	fabsx		%fp0
+	fsubd		T1(%pc),%fp0		| ...(|X|-16381LOG2_LEAD)
+	fsubd		T2(%pc),%fp0		| ...|X| - 16381 LOG2, ACCURATE
+
+	movel		%d1,-(%sp)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		setox
+	fmovel		(%sp)+,%fpcr
+
+	fmulx		TWO16380(%pc),%fp0
+	bra		t_frcinx
+
+COSHHUGE:
+	fmovel		#0,%fpsr		|clr N bit if set by source
+	bclrb		#7,(%a0)		|always return positive value
+	fmovemx	(%a0),%fp0-%fp0
+	bra		t_ovfl
+
+	|end
diff --git a/arch/m68k/fpsp040/setox.S b/arch/m68k/fpsp040/setox.S
new file mode 100644
index 0000000..0aa75f9
--- /dev/null
+++ b/arch/m68k/fpsp040/setox.S
@@ -0,0 +1,865 @@
+|
+|	setox.sa 3.1 12/10/90
+|
+|	The entry point setox computes the exponential of a value.
+|	setoxd does the same except the input value is a denormalized
+|	number.	setoxm1 computes exp(X)-1, and setoxm1d computes
+|	exp(X)-1 for denormalized X.
+|
+|	INPUT
+|	-----
+|	Double-extended value in memory location pointed to by address
+|	register a0.
+|
+|	OUTPUT
+|	------
+|	exp(X) or exp(X)-1 returned in floating-point register fp0.
+|
+|	ACCURACY and MONOTONICITY
+|	-------------------------
+|	The returned result is within 0.85 ulps in 64 significant bit, i.e.
+|	within 0.5001 ulp to 53 bits if the result is subsequently rounded
+|	to double precision. The result is provably monotonic in double
+|	precision.
+|
+|	SPEED
+|	-----
+|	Two timings are measured, both in the copy-back mode. The
+|	first one is measured when the function is invoked the first time
+|	(so the instructions and data are not in cache), and the
+|	second one is measured when the function is reinvoked at the same
+|	input argument.
+|
+|	The program setox takes approximately 210/190 cycles for input
+|	argument X whose magnitude is less than 16380 log2, which
+|	is the usual situation.	For the less common arguments,
+|	depending on their values, the program may run faster or slower --
+|	but no worse than 10% slower even in the extreme cases.
+|
+|	The program setoxm1 takes approximately ???/??? cycles for input
+|	argument X, 0.25 <= |X| < 70log2. For |X| < 0.25, it takes
+|	approximately ???/??? cycles. For the less common arguments,
+|	depending on their values, the program may run faster or slower --
+|	but no worse than 10% slower even in the extreme cases.
+|
+|	ALGORITHM and IMPLEMENTATION NOTES
+|	----------------------------------
+|
+|	setoxd
+|	------
+|	Step 1.	Set ans := 1.0
+|
+|	Step 2.	Return	ans := ans + sign(X)*2^(-126). Exit.
+|	Notes:	This will always generate one exception -- inexact.
+|
+|
+|	setox
+|	-----
+|
+|	Step 1.	Filter out extreme cases of input argument.
+|		1.1	If |X| >= 2^(-65), go to Step 1.3.
+|		1.2	Go to Step 7.
+|		1.3	If |X| < 16380 log(2), go to Step 2.
+|		1.4	Go to Step 8.
+|	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
+|		 To avoid the use of floating-point comparisons, a
+|		 compact representation of |X| is used. This format is a
+|		 32-bit integer, the upper (more significant) 16 bits are
+|		 the sign and biased exponent field of |X|; the lower 16
+|		 bits are the 16 most significant fraction (including the
+|		 explicit bit) bits of |X|. Consequently, the comparisons
+|		 in Steps 1.1 and 1.3 can be performed by integer comparison.
+|		 Note also that the constant 16380 log(2) used in Step 1.3
+|		 is also in the compact form. Thus taking the branch
+|		 to Step 2 guarantees |X| < 16380 log(2). There is no harm
+|		 to have a small number of cases where |X| is less than,
+|		 but close to, 16380 log(2) and the branch to Step 9 is
+|		 taken.
+|
+|	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
+|		2.1	Set AdjFlag := 0 (indicates the branch 1.3 -> 2 was taken)
+|		2.2	N := round-to-nearest-integer( X * 64/log2 ).
+|		2.3	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
+|		2.4	Calculate	M = (N - J)/64; so N = 64M + J.
+|		2.5	Calculate the address of the stored value of 2^(J/64).
+|		2.6	Create the value Scale = 2^M.
+|	Notes:	The calculation in 2.2 is really performed by
+|
+|			Z := X * constant
+|			N := round-to-nearest-integer(Z)
+|
+|		 where
+|
+|			constant := single-precision( 64/log 2 ).
+|
+|		 Using a single-precision constant avoids memory access.
+|		 Another effect of using a single-precision "constant" is
+|		 that the calculated value Z is
+|
+|			Z = X*(64/log2)*(1+eps), |eps| <= 2^(-24).
+|
+|		 This error has to be considered later in Steps 3 and 4.
+|
+|	Step 3.	Calculate X - N*log2/64.
+|		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
+|		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
+|	Notes:	a) The way L1 and L2 are chosen ensures L1+L2 approximate
+|		 the value	-log2/64	to 88 bits of accuracy.
+|		 b) N*L1 is exact because N is no longer than 22 bits and
+|		 L1 is no longer than 24 bits.
+|		 c) The calculation X+N*L1 is also exact due to cancellation.
+|		 Thus, R is practically X+N(L1+L2) to full 64 bits.
+|		 d) It is important to estimate how large can |R| be after
+|		 Step 3.2.
+|
+|			N = rnd-to-int( X*64/log2 (1+eps) ), |eps|<=2^(-24)
+|			X*64/log2 (1+eps)	=	N + f,	|f| <= 0.5
+|			X*64/log2 - N	=	f - eps*X 64/log2
+|			X - N*log2/64	=	f*log2/64 - eps*X
+|
+|
+|		 Now |X| <= 16446 log2, thus
+|
+|			|X - N*log2/64| <= (0.5 + 16446/2^(18))*log2/64
+|					<= 0.57 log2/64.
+|		 This bound will be used in Step 4.
+|
+|	Step 4.	Approximate exp(R)-1 by a polynomial
+|			p = R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
+|	Notes:	a) In order to reduce memory access, the coefficients are
+|		 made as "short" as possible: A1 (which is 1/2), A4 and A5
+|		 are single precision; A2 and A3 are double precision.
+|		 b) Even with the restrictions above,
+|			|p - (exp(R)-1)| < 2^(-68.8) for all |R| <= 0.0062.
+|		 Note that 0.0062 is slightly bigger than 0.57 log2/64.
+|		 c) To fully utilize the pipeline, p is separated into
+|		 two independent pieces of roughly equal complexities
+|			p = [ R + R*S*(A2 + S*A4) ]	+
+|				[ S*(A1 + S*(A3 + S*A5)) ]
+|		 where S = R*R.
+|
+|	Step 5.	Compute 2^(J/64)*exp(R) = 2^(J/64)*(1+p) by
+|				ans := T + ( T*p + t)
+|		 where T and t are the stored values for 2^(J/64).
+|	Notes:	2^(J/64) is stored as T and t where T+t approximates
+|		 2^(J/64) to roughly 85 bits; T is in extended precision
+|		 and t is in single precision. Note also that T is rounded
+|		 to 62 bits so that the last two bits of T are zero. The
+|		 reason for such a special form is that T-1, T-2, and T-8
+|		 will all be exact --- a property that will give much
+|		 more accurate computation of the function EXPM1.
+|
+|	Step 6.	Reconstruction of exp(X)
+|			exp(X) = 2^M * 2^(J/64) * exp(R).
+|		6.1	If AdjFlag = 0, go to 6.3
+|		6.2	ans := ans * AdjScale
+|		6.3	Restore the user FPCR
+|		6.4	Return ans := ans * Scale. Exit.
+|	Notes:	If AdjFlag = 0, we have X = Mlog2 + Jlog2/64 + R,
+|		 |M| <= 16380, and Scale = 2^M. Moreover, exp(X) will
+|		 neither overflow nor underflow. If AdjFlag = 1, that
+|		 means that
+|			X = (M1+M)log2 + Jlog2/64 + R, |M1+M| >= 16380.
+|		 Hence, exp(X) may overflow or underflow or neither.
+|		 When that is the case, AdjScale = 2^(M1) where M1 is
+|		 approximately M. Thus 6.2 will never cause over/underflow.
+|		 Possible exception in 6.4 is overflow or underflow.
+|		 The inexact exception is not generated in 6.4. Although
+|		 one can argue that the inexact flag should always be
+|		 raised, to simulate that exception cost to much than the
+|		 flag is worth in practical uses.
+|
+|	Step 7.	Return 1 + X.
+|		7.1	ans := X
+|		7.2	Restore user FPCR.
+|		7.3	Return ans := 1 + ans. Exit
+|	Notes:	For non-zero X, the inexact exception will always be
+|		 raised by 7.3. That is the only exception raised by 7.3.
+|		 Note also that we use the FMOVEM instruction to move X
+|		 in Step 7.1 to avoid unnecessary trapping. (Although
+|		 the FMOVEM may not seem relevant since X is normalized,
+|		 the precaution will be useful in the library version of
+|		 this code where the separate entry for denormalized inputs
+|		 will be done away with.)
+|
+|	Step 8.	Handle exp(X) where |X| >= 16380log2.
+|		8.1	If |X| > 16480 log2, go to Step 9.
+|		(mimic 2.2 - 2.6)
+|		8.2	N := round-to-integer( X * 64/log2 )
+|		8.3	Calculate J = N mod 64, J = 0,1,...,63
+|		8.4	K := (N-J)/64, M1 := truncate(K/2), M = K-M1, AdjFlag := 1.
+|		8.5	Calculate the address of the stored value 2^(J/64).
+|		8.6	Create the values Scale = 2^M, AdjScale = 2^M1.
+|		8.7	Go to Step 3.
+|	Notes:	Refer to notes for 2.2 - 2.6.
+|
+|	Step 9.	Handle exp(X), |X| > 16480 log2.
+|		9.1	If X < 0, go to 9.3
+|		9.2	ans := Huge, go to 9.4
+|		9.3	ans := Tiny.
+|		9.4	Restore user FPCR.
+|		9.5	Return ans := ans * ans. Exit.
+|	Notes:	Exp(X) will surely overflow or underflow, depending on
+|		 X's sign. "Huge" and "Tiny" are respectively large/tiny
+|		 extended-precision numbers whose square over/underflow
+|		 with an inexact result. Thus, 9.5 always raises the
+|		 inexact together with either overflow or underflow.
+|
+|
+|	setoxm1d
+|	--------
+|
+|	Step 1.	Set ans := 0
+|
+|	Step 2.	Return	ans := X + ans. Exit.
+|	Notes:	This will return X with the appropriate rounding
+|		 precision prescribed by the user FPCR.
+|
+|	setoxm1
+|	-------
+|
+|	Step 1.	Check |X|
+|		1.1	If |X| >= 1/4, go to Step 1.3.
+|		1.2	Go to Step 7.
+|		1.3	If |X| < 70 log(2), go to Step 2.
+|		1.4	Go to Step 10.
+|	Notes:	The usual case should take the branches 1.1 -> 1.3 -> 2.
+|		 However, it is conceivable |X| can be small very often
+|		 because EXPM1 is intended to evaluate exp(X)-1 accurately
+|		 when |X| is small. For further details on the comparisons,
+|		 see the notes on Step 1 of setox.
+|
+|	Step 2.	Calculate N = round-to-nearest-int( X * 64/log2 ).
+|		2.1	N := round-to-nearest-integer( X * 64/log2 ).
+|		2.2	Calculate	J = N mod 64; so J = 0,1,2,..., or 63.
+|		2.3	Calculate	M = (N - J)/64; so N = 64M + J.
+|		2.4	Calculate the address of the stored value of 2^(J/64).
+|		2.5	Create the values Sc = 2^M and OnebySc := -2^(-M).
+|	Notes:	See the notes on Step 2 of setox.
+|
+|	Step 3.	Calculate X - N*log2/64.
+|		3.1	R := X + N*L1, where L1 := single-precision(-log2/64).
+|		3.2	R := R + N*L2, L2 := extended-precision(-log2/64 - L1).
+|	Notes:	Applying the analysis of Step 3 of setox in this case
+|		 shows that |R| <= 0.0055 (note that |X| <= 70 log2 in
+|		 this case).
+|
+|	Step 4.	Approximate exp(R)-1 by a polynomial
+|			p = R+R*R*(A1+R*(A2+R*(A3+R*(A4+R*(A5+R*A6)))))
+|	Notes:	a) In order to reduce memory access, the coefficients are
+|		 made as "short" as possible: A1 (which is 1/2), A5 and A6
+|		 are single precision; A2, A3 and A4 are double precision.
+|		 b) Even with the restriction above,
+|			|p - (exp(R)-1)| <	|R| * 2^(-72.7)
+|		 for all |R| <= 0.0055.
+|		 c) To fully utilize the pipeline, p is separated into
+|		 two independent pieces of roughly equal complexity
+|			p = [ R*S*(A2 + S*(A4 + S*A6)) ]	+
+|				[ R + S*(A1 + S*(A3 + S*A5)) ]
+|		 where S = R*R.
+|
+|	Step 5.	Compute 2^(J/64)*p by
+|				p := T*p
+|		 where T and t are the stored values for 2^(J/64).
+|	Notes:	2^(J/64) is stored as T and t where T+t approximates
+|		 2^(J/64) to roughly 85 bits; T is in extended precision
+|		 and t is in single precision. Note also that T is rounded
+|		 to 62 bits so that the last two bits of T are zero. The
+|		 reason for such a special form is that T-1, T-2, and T-8
+|		 will all be exact --- a property that will be exploited
+|		 in Step 6 below. The total relative error in p is no
+|		 bigger than 2^(-67.7) compared to the final result.
+|
+|	Step 6.	Reconstruction of exp(X)-1
+|			exp(X)-1 = 2^M * ( 2^(J/64) + p - 2^(-M) ).
+|		6.1	If M <= 63, go to Step 6.3.
+|		6.2	ans := T + (p + (t + OnebySc)). Go to 6.6
+|		6.3	If M >= -3, go to 6.5.
+|		6.4	ans := (T + (p + t)) + OnebySc. Go to 6.6
+|		6.5	ans := (T + OnebySc) + (p + t).
+|		6.6	Restore user FPCR.
+|		6.7	Return ans := Sc * ans. Exit.
+|	Notes:	The various arrangements of the expressions give accurate
+|		 evaluations.
+|
+|	Step 7.	exp(X)-1 for |X| < 1/4.
+|		7.1	If |X| >= 2^(-65), go to Step 9.
+|		7.2	Go to Step 8.
+|
+|	Step 8.	Calculate exp(X)-1, |X| < 2^(-65).
+|		8.1	If |X| < 2^(-16312), goto 8.3
+|		8.2	Restore FPCR; return ans := X - 2^(-16382). Exit.
+|		8.3	X := X * 2^(140).
+|		8.4	Restore FPCR; ans := ans - 2^(-16382).
+|		 Return ans := ans*2^(140). Exit
+|	Notes:	The idea is to return "X - tiny" under the user
+|		 precision and rounding modes. To avoid unnecessary
+|		 inefficiency, we stay away from denormalized numbers the
+|		 best we can. For |X| >= 2^(-16312), the straightforward
+|		 8.2 generates the inexact exception as the case warrants.
+|
+|	Step 9.	Calculate exp(X)-1, |X| < 1/4, by a polynomial
+|			p = X + X*X*(B1 + X*(B2 + ... + X*B12))
+|	Notes:	a) In order to reduce memory access, the coefficients are
+|		 made as "short" as possible: B1 (which is 1/2), B9 to B12
+|		 are single precision; B3 to B8 are double precision; and
+|		 B2 is double extended.
+|		 b) Even with the restriction above,
+|			|p - (exp(X)-1)| < |X| 2^(-70.6)
+|		 for all |X| <= 0.251.
+|		 Note that 0.251 is slightly bigger than 1/4.
+|		 c) To fully preserve accuracy, the polynomial is computed
+|		 as	X + ( S*B1 +	Q ) where S = X*X and
+|			Q	=	X*S*(B2 + X*(B3 + ... + X*B12))
+|		 d) To fully utilize the pipeline, Q is separated into
+|		 two independent pieces of roughly equal complexity
+|			Q = [ X*S*(B2 + S*(B4 + ... + S*B12)) ] +
+|				[ S*S*(B3 + S*(B5 + ... + S*B11)) ]
+|
+|	Step 10.	Calculate exp(X)-1 for |X| >= 70 log 2.
+|		10.1 If X >= 70log2 , exp(X) - 1 = exp(X) for all practical
+|		 purposes. Therefore, go to Step 1 of setox.
+|		10.2 If X <= -70log2, exp(X) - 1 = -1 for all practical purposes.
+|		 ans := -1
+|		 Restore user FPCR
+|		 Return ans := ans + 2^(-126). Exit.
+|	Notes:	10.2 will always create an inexact and return -1 + tiny
+|		 in the user rounding precision and mode.
+|
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|setox	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+L2:	.long	0x3FDC0000,0x82E30865,0x4361C4C6,0x00000000
+
+EXPA3:	.long	0x3FA55555,0x55554431
+EXPA2:	.long	0x3FC55555,0x55554018
+
+HUGE:	.long	0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+TINY:	.long	0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+
+EM1A4:	.long	0x3F811111,0x11174385
+EM1A3:	.long	0x3FA55555,0x55554F5A
+
+EM1A2:	.long	0x3FC55555,0x55555555,0x00000000,0x00000000
+
+EM1B8:	.long	0x3EC71DE3,0xA5774682
+EM1B7:	.long	0x3EFA01A0,0x19D7CB68
+
+EM1B6:	.long	0x3F2A01A0,0x1A019DF3
+EM1B5:	.long	0x3F56C16C,0x16C170E2
+
+EM1B4:	.long	0x3F811111,0x11111111
+EM1B3:	.long	0x3FA55555,0x55555555
+
+EM1B2:	.long	0x3FFC0000,0xAAAAAAAA,0xAAAAAAAB
+	.long	0x00000000
+
+TWO140:	.long	0x48B00000,0x00000000
+TWON140:	.long	0x37300000,0x00000000
+
+EXPTBL:
+	.long	0x3FFF0000,0x80000000,0x00000000,0x00000000
+	.long	0x3FFF0000,0x8164D1F3,0xBC030774,0x9F841A9B
+	.long	0x3FFF0000,0x82CD8698,0xAC2BA1D8,0x9FC1D5B9
+	.long	0x3FFF0000,0x843A28C3,0xACDE4048,0xA0728369
+	.long	0x3FFF0000,0x85AAC367,0xCC487B14,0x1FC5C95C
+	.long	0x3FFF0000,0x871F6196,0x9E8D1010,0x1EE85C9F
+	.long	0x3FFF0000,0x88980E80,0x92DA8528,0x9FA20729
+	.long	0x3FFF0000,0x8A14D575,0x496EFD9C,0xA07BF9AF
+	.long	0x3FFF0000,0x8B95C1E3,0xEA8BD6E8,0xA0020DCF
+	.long	0x3FFF0000,0x8D1ADF5B,0x7E5BA9E4,0x205A63DA
+	.long	0x3FFF0000,0x8EA4398B,0x45CD53C0,0x1EB70051
+	.long	0x3FFF0000,0x9031DC43,0x1466B1DC,0x1F6EB029
+	.long	0x3FFF0000,0x91C3D373,0xAB11C338,0xA0781494
+	.long	0x3FFF0000,0x935A2B2F,0x13E6E92C,0x9EB319B0
+	.long	0x3FFF0000,0x94F4EFA8,0xFEF70960,0x2017457D
+	.long	0x3FFF0000,0x96942D37,0x20185A00,0x1F11D537
+	.long	0x3FFF0000,0x9837F051,0x8DB8A970,0x9FB952DD
+	.long	0x3FFF0000,0x99E04593,0x20B7FA64,0x1FE43087
+	.long	0x3FFF0000,0x9B8D39B9,0xD54E5538,0x1FA2A818
+	.long	0x3FFF0000,0x9D3ED9A7,0x2CFFB750,0x1FDE494D
+	.long	0x3FFF0000,0x9EF53260,0x91A111AC,0x20504890
+	.long	0x3FFF0000,0xA0B0510F,0xB9714FC4,0xA073691C
+	.long	0x3FFF0000,0xA2704303,0x0C496818,0x1F9B7A05
+	.long	0x3FFF0000,0xA43515AE,0x09E680A0,0xA0797126
+	.long	0x3FFF0000,0xA5FED6A9,0xB15138EC,0xA071A140
+	.long	0x3FFF0000,0xA7CD93B4,0xE9653568,0x204F62DA
+	.long	0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x1F283C4A
+	.long	0x3FFF0000,0xAB7A39B5,0xA93ED338,0x9F9A7FDC
+	.long	0x3FFF0000,0xAD583EEA,0x42A14AC8,0xA05B3FAC
+	.long	0x3FFF0000,0xAF3B78AD,0x690A4374,0x1FDF2610
+	.long	0x3FFF0000,0xB123F581,0xD2AC2590,0x9F705F90
+	.long	0x3FFF0000,0xB311C412,0xA9112488,0x201F678A
+	.long	0x3FFF0000,0xB504F333,0xF9DE6484,0x1F32FB13
+	.long	0x3FFF0000,0xB6FD91E3,0x28D17790,0x20038B30
+	.long	0x3FFF0000,0xB8FBAF47,0x62FB9EE8,0x200DC3CC
+	.long	0x3FFF0000,0xBAFF5AB2,0x133E45FC,0x9F8B2AE6
+	.long	0x3FFF0000,0xBD08A39F,0x580C36C0,0xA02BBF70
+	.long	0x3FFF0000,0xBF1799B6,0x7A731084,0xA00BF518
+	.long	0x3FFF0000,0xC12C4CCA,0x66709458,0xA041DD41
+	.long	0x3FFF0000,0xC346CCDA,0x24976408,0x9FDF137B
+	.long	0x3FFF0000,0xC5672A11,0x5506DADC,0x201F1568
+	.long	0x3FFF0000,0xC78D74C8,0xABB9B15C,0x1FC13A2E
+	.long	0x3FFF0000,0xC9B9BD86,0x6E2F27A4,0xA03F8F03
+	.long	0x3FFF0000,0xCBEC14FE,0xF2727C5C,0x1FF4907D
+	.long	0x3FFF0000,0xCE248C15,0x1F8480E4,0x9E6E53E4
+	.long	0x3FFF0000,0xD06333DA,0xEF2B2594,0x1FD6D45C
+	.long	0x3FFF0000,0xD2A81D91,0xF12AE45C,0xA076EDB9
+	.long	0x3FFF0000,0xD4F35AAB,0xCFEDFA20,0x9FA6DE21
+	.long	0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x1EE69A2F
+	.long	0x3FFF0000,0xD99D15C2,0x78AFD7B4,0x207F439F
+	.long	0x3FFF0000,0xDBFBB797,0xDAF23754,0x201EC207
+	.long	0x3FFF0000,0xDE60F482,0x5E0E9124,0x9E8BE175
+	.long	0x3FFF0000,0xE0CCDEEC,0x2A94E110,0x20032C4B
+	.long	0x3FFF0000,0xE33F8972,0xBE8A5A50,0x2004DFF5
+	.long	0x3FFF0000,0xE5B906E7,0x7C8348A8,0x1E72F47A
+	.long	0x3FFF0000,0xE8396A50,0x3C4BDC68,0x1F722F22
+	.long	0x3FFF0000,0xEAC0C6E7,0xDD243930,0xA017E945
+	.long	0x3FFF0000,0xED4F301E,0xD9942B84,0x1F401A5B
+	.long	0x3FFF0000,0xEFE4B99B,0xDCDAF5CC,0x9FB9A9E3
+	.long	0x3FFF0000,0xF281773C,0x59FFB138,0x20744C05
+	.long	0x3FFF0000,0xF5257D15,0x2486CC2C,0x1F773A19
+	.long	0x3FFF0000,0xF7D0DF73,0x0AD13BB8,0x1FFE90D5
+	.long	0x3FFF0000,0xFA83B2DB,0x722A033C,0xA041ED22
+	.long	0x3FFF0000,0xFD3E0C0C,0xF486C174,0x1F853F3A
+
+	.set	ADJFLAG,L_SCR2
+	.set	SCALE,FP_SCR1
+	.set	ADJSCALE,FP_SCR2
+	.set	SC,FP_SCR3
+	.set	ONEBYSC,FP_SCR4
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	t_unfl
+	|xref	t_ovfl
+
+	.global	setoxd
+setoxd:
+|--entry point for EXP(X), X is denormalized
+	movel		(%a0),%d0
+	andil		#0x80000000,%d0
+	oril		#0x00800000,%d0		| ...sign(X)*2^(-126)
+	movel		%d0,-(%sp)
+	fmoves		#0x3F800000,%fp0
+	fmovel		%d1,%fpcr
+	fadds		(%sp)+,%fp0
+	bra		t_frcinx
+
+	.global	setox
+setox:
+|--entry point for EXP(X), here X is finite, non-zero, and not NaN's
+
+|--Step 1.
+	movel		(%a0),%d0	 | ...load part of input X
+	andil		#0x7FFF0000,%d0	| ...biased expo. of X
+	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
+	bges		EXPC1		| ...normal case
+	bra		EXPSM
+
+EXPC1:
+|--The case |X| >= 2^(-65)
+	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
+	cmpil		#0x400CB167,%d0	| ...16380 log2 trunc. 16 bits
+	blts		EXPMAIN	 | ...normal case
+	bra		EXPBIG
+
+EXPMAIN:
+|--Step 2.
+|--This is the normal branch:	2^(-65) <= |X| < 16380 log2.
+	fmovex		(%a0),%fp0	| ...load input from (a0)
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
+	fmovemx	%fp2-%fp2/%fp3,-(%a7)		| ...save fp2
+	movel		#0,ADJFLAG(%a6)
+	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
+	lea		EXPTBL,%a1
+	fmovel		%d0,%fp0		| ...convert to floating-format
+
+	movel		%d0,L_SCR1(%a6)	| ...save N temporarily
+	andil		#0x3F,%d0		| ...D0 is J = N mod 64
+	lsll		#4,%d0
+	addal		%d0,%a1		| ...address of 2^(J/64)
+	movel		L_SCR1(%a6),%d0
+	asrl		#6,%d0		| ...D0 is M
+	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(M)
+	movew		L2,L_SCR1(%a6)	| ...prefetch L2, no need in CB
+
+EXPCONT1:
+|--Step 3.
+|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
+|--a0 points to 2^(J/64), D0 is biased expo. of 2^(M)
+	fmovex		%fp0,%fp2
+	fmuls		#0xBC317218,%fp0	| ...N * L1, L1 = lead(-log2/64)
+	fmulx		L2,%fp2		| ...N * L2, L1+L2 = -log2/64
+	faddx		%fp1,%fp0		| ...X + N*L1
+	faddx		%fp2,%fp0		| ...fp0 is R, reduced arg.
+|	MOVE.W		#$3FA5,EXPA3	...load EXPA3 in cache
+
+|--Step 4.
+|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
+|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*A5))))
+|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
+|--[R+R*S*(A2+S*A4)] + [S*(A1+S*(A3+S*A5))]
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		| ...fp1 IS S = R*R
+
+	fmoves		#0x3AB60B70,%fp2	| ...fp2 IS A5
+|	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
+
+	fmulx		%fp1,%fp2		| ...fp2 IS S*A5
+	fmovex		%fp1,%fp3
+	fmuls		#0x3C088895,%fp3	| ...fp3 IS S*A4
+
+	faddd		EXPA3,%fp2	| ...fp2 IS A3+S*A5
+	faddd		EXPA2,%fp3	| ...fp3 IS A2+S*A4
+
+	fmulx		%fp1,%fp2		| ...fp2 IS S*(A3+S*A5)
+	movew		%d0,SCALE(%a6)	| ...SCALE is 2^(M) in extended
+	clrw		SCALE+2(%a6)
+	movel		#0x80000000,SCALE+4(%a6)
+	clrl		SCALE+8(%a6)
+
+	fmulx		%fp1,%fp3		| ...fp3 IS S*(A2+S*A4)
+
+	fadds		#0x3F000000,%fp2	| ...fp2 IS A1+S*(A3+S*A5)
+	fmulx		%fp0,%fp3		| ...fp3 IS R*S*(A2+S*A4)
+
+	fmulx		%fp1,%fp2		| ...fp2 IS S*(A1+S*(A3+S*A5))
+	faddx		%fp3,%fp0		| ...fp0 IS R+R*S*(A2+S*A4),
+|					...fp3 released
+
+	fmovex		(%a1)+,%fp1	| ...fp1 is lead. pt. of 2^(J/64)
+	faddx		%fp2,%fp0		| ...fp0 is EXP(R) - 1
+|					...fp2 released
+
+|--Step 5
+|--final reconstruction process
+|--EXP(X) = 2^M * ( 2^(J/64) + 2^(J/64)*(EXP(R)-1) )
+
+	fmulx		%fp1,%fp0		| ...2^(J/64)*(Exp(R)-1)
+	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
+	fadds		(%a1),%fp0	| ...accurate 2^(J/64)
+
+	faddx		%fp1,%fp0		| ...2^(J/64) + 2^(J/64)*...
+	movel		ADJFLAG(%a6),%d0
+
+|--Step 6
+	tstl		%d0
+	beqs		NORMAL
+ADJUST:
+	fmulx		ADJSCALE(%a6),%fp0
+NORMAL:
+	fmovel		%d1,%FPCR		| ...restore user FPCR
+	fmulx		SCALE(%a6),%fp0	| ...multiply 2^(M)
+	bra		t_frcinx
+
+EXPSM:
+|--Step 7
+	fmovemx	(%a0),%fp0-%fp0	| ...in case X is denormalized
+	fmovel		%d1,%FPCR
+	fadds		#0x3F800000,%fp0	| ...1+X in user mode
+	bra		t_frcinx
+
+EXPBIG:
+|--Step 8
+	cmpil		#0x400CB27C,%d0	| ...16480 log2
+	bgts		EXP2BIG
+|--Steps 8.2 -- 8.6
+	fmovex		(%a0),%fp0	| ...load input from (a0)
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
+	fmovemx	 %fp2-%fp2/%fp3,-(%a7)		| ...save fp2
+	movel		#1,ADJFLAG(%a6)
+	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
+	lea		EXPTBL,%a1
+	fmovel		%d0,%fp0		| ...convert to floating-format
+	movel		%d0,L_SCR1(%a6)			| ...save N temporarily
+	andil		#0x3F,%d0		 | ...D0 is J = N mod 64
+	lsll		#4,%d0
+	addal		%d0,%a1			| ...address of 2^(J/64)
+	movel		L_SCR1(%a6),%d0
+	asrl		#6,%d0			| ...D0 is K
+	movel		%d0,L_SCR1(%a6)			| ...save K temporarily
+	asrl		#1,%d0			| ...D0 is M1
+	subl		%d0,L_SCR1(%a6)			| ...a1 is M
+	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M1)
+	movew		%d0,ADJSCALE(%a6)		| ...ADJSCALE := 2^(M1)
+	clrw		ADJSCALE+2(%a6)
+	movel		#0x80000000,ADJSCALE+4(%a6)
+	clrl		ADJSCALE+8(%a6)
+	movel		L_SCR1(%a6),%d0			| ...D0 is M
+	addiw		#0x3FFF,%d0		| ...biased expo. of 2^(M)
+	bra		EXPCONT1		| ...go back to Step 3
+
+EXP2BIG:
+|--Step 9
+	fmovel		%d1,%FPCR
+	movel		(%a0),%d0
+	bclrb		#sign_bit,(%a0)		| ...setox always returns positive
+	cmpil		#0,%d0
+	blt		t_unfl
+	bra		t_ovfl
+
+	.global	setoxm1d
+setoxm1d:
+|--entry point for EXPM1(X), here X is denormalized
+|--Step 0.
+	bra		t_extdnrm
+
+
+	.global	setoxm1
+setoxm1:
+|--entry point for EXPM1(X), here X is finite, non-zero, non-NaN
+
+|--Step 1.
+|--Step 1.1
+	movel		(%a0),%d0	 | ...load part of input X
+	andil		#0x7FFF0000,%d0	| ...biased expo. of X
+	cmpil		#0x3FFD0000,%d0	| ...1/4
+	bges		EM1CON1	 | ...|X| >= 1/4
+	bra		EM1SM
+
+EM1CON1:
+|--Step 1.3
+|--The case |X| >= 1/4
+	movew		4(%a0),%d0	| ...expo. and partial sig. of |X|
+	cmpil		#0x4004C215,%d0	| ...70log2 rounded up to 16 bits
+	bles		EM1MAIN	 | ...1/4 <= |X| <= 70log2
+	bra		EM1BIG
+
+EM1MAIN:
+|--Step 2.
+|--This is the case:	1/4 <= |X| <= 70 log2.
+	fmovex		(%a0),%fp0	| ...load input from (a0)
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42B8AA3B,%fp0	| ...64/log2 * X
+	fmovemx	%fp2-%fp2/%fp3,-(%a7)		| ...save fp2
+|	MOVE.W		#$3F81,EM1A4		...prefetch in CB mode
+	fmovel		%fp0,%d0		| ...N = int( X * 64/log2 )
+	lea		EXPTBL,%a1
+	fmovel		%d0,%fp0		| ...convert to floating-format
+
+	movel		%d0,L_SCR1(%a6)			| ...save N temporarily
+	andil		#0x3F,%d0		 | ...D0 is J = N mod 64
+	lsll		#4,%d0
+	addal		%d0,%a1			| ...address of 2^(J/64)
+	movel		L_SCR1(%a6),%d0
+	asrl		#6,%d0			| ...D0 is M
+	movel		%d0,L_SCR1(%a6)			| ...save a copy of M
+|	MOVE.W		#$3FDC,L2		...prefetch L2 in CB mode
+
+|--Step 3.
+|--fp1,fp2 saved on the stack. fp0 is N, fp1 is X,
+|--a0 points to 2^(J/64), D0 and a1 both contain M
+	fmovex		%fp0,%fp2
+	fmuls		#0xBC317218,%fp0	| ...N * L1, L1 = lead(-log2/64)
+	fmulx		L2,%fp2		| ...N * L2, L1+L2 = -log2/64
+	faddx		%fp1,%fp0	 | ...X + N*L1
+	faddx		%fp2,%fp0	 | ...fp0 is R, reduced arg.
+|	MOVE.W		#$3FC5,EM1A2		...load EM1A2 in cache
+	addiw		#0x3FFF,%d0		| ...D0 is biased expo. of 2^M
+
+|--Step 4.
+|--WE NOW COMPUTE EXP(R)-1 BY A POLYNOMIAL
+|-- R + R*R*(A1 + R*(A2 + R*(A3 + R*(A4 + R*(A5 + R*A6)))))
+|--TO FULLY UTILIZE THE PIPELINE, WE COMPUTE S = R*R
+|--[R*S*(A2+S*(A4+S*A6))] + [R+S*(A1+S*(A3+S*A5))]
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		| ...fp1 IS S = R*R
+
+	fmoves		#0x3950097B,%fp2	| ...fp2 IS a6
+|	MOVE.W		#0,2(%a1)	...load 2^(J/64) in cache
+
+	fmulx		%fp1,%fp2		| ...fp2 IS S*A6
+	fmovex		%fp1,%fp3
+	fmuls		#0x3AB60B6A,%fp3	| ...fp3 IS S*A5
+
+	faddd		EM1A4,%fp2	| ...fp2 IS A4+S*A6
+	faddd		EM1A3,%fp3	| ...fp3 IS A3+S*A5
+	movew		%d0,SC(%a6)		| ...SC is 2^(M) in extended
+	clrw		SC+2(%a6)
+	movel		#0x80000000,SC+4(%a6)
+	clrl		SC+8(%a6)
+
+	fmulx		%fp1,%fp2		| ...fp2 IS S*(A4+S*A6)
+	movel		L_SCR1(%a6),%d0		| ...D0 is	M
+	negw		%d0		| ...D0 is -M
+	fmulx		%fp1,%fp3		| ...fp3 IS S*(A3+S*A5)
+	addiw		#0x3FFF,%d0	| ...biased expo. of 2^(-M)
+	faddd		EM1A2,%fp2	| ...fp2 IS A2+S*(A4+S*A6)
+	fadds		#0x3F000000,%fp3	| ...fp3 IS A1+S*(A3+S*A5)
+
+	fmulx		%fp1,%fp2		| ...fp2 IS S*(A2+S*(A4+S*A6))
+	oriw		#0x8000,%d0	| ...signed/expo. of -2^(-M)
+	movew		%d0,ONEBYSC(%a6)	| ...OnebySc is -2^(-M)
+	clrw		ONEBYSC+2(%a6)
+	movel		#0x80000000,ONEBYSC+4(%a6)
+	clrl		ONEBYSC+8(%a6)
+	fmulx		%fp3,%fp1		| ...fp1 IS S*(A1+S*(A3+S*A5))
+|					...fp3 released
+
+	fmulx		%fp0,%fp2		| ...fp2 IS R*S*(A2+S*(A4+S*A6))
+	faddx		%fp1,%fp0		| ...fp0 IS R+S*(A1+S*(A3+S*A5))
+|					...fp1 released
+
+	faddx		%fp2,%fp0		| ...fp0 IS EXP(R)-1
+|					...fp2 released
+	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
+
+|--Step 5
+|--Compute 2^(J/64)*p
+
+	fmulx		(%a1),%fp0	| ...2^(J/64)*(Exp(R)-1)
+
+|--Step 6
+|--Step 6.1
+	movel		L_SCR1(%a6),%d0		| ...retrieve M
+	cmpil		#63,%d0
+	bles		MLE63
+|--Step 6.2	M >= 64
+	fmoves		12(%a1),%fp1	| ...fp1 is t
+	faddx		ONEBYSC(%a6),%fp1	| ...fp1 is t+OnebySc
+	faddx		%fp1,%fp0		| ...p+(t+OnebySc), fp1 released
+	faddx		(%a1),%fp0	| ...T+(p+(t+OnebySc))
+	bras		EM1SCALE
+MLE63:
+|--Step 6.3	M <= 63
+	cmpil		#-3,%d0
+	bges		MGEN3
+MLTN3:
+|--Step 6.4	M <= -4
+	fadds		12(%a1),%fp0	| ...p+t
+	faddx		(%a1),%fp0	| ...T+(p+t)
+	faddx		ONEBYSC(%a6),%fp0	| ...OnebySc + (T+(p+t))
+	bras		EM1SCALE
+MGEN3:
+|--Step 6.5	-3 <= M <= 63
+	fmovex		(%a1)+,%fp1	| ...fp1 is T
+	fadds		(%a1),%fp0	| ...fp0 is p+t
+	faddx		ONEBYSC(%a6),%fp1	| ...fp1 is T+OnebySc
+	faddx		%fp1,%fp0		| ...(T+OnebySc)+(p+t)
+
+EM1SCALE:
+|--Step 6.6
+	fmovel		%d1,%FPCR
+	fmulx		SC(%a6),%fp0
+
+	bra		t_frcinx
+
+EM1SM:
+|--Step 7	|X| < 1/4.
+	cmpil		#0x3FBE0000,%d0	| ...2^(-65)
+	bges		EM1POLY
+
+EM1TINY:
+|--Step 8	|X| < 2^(-65)
+	cmpil		#0x00330000,%d0	| ...2^(-16312)
+	blts		EM12TINY
+|--Step 8.2
+	movel		#0x80010000,SC(%a6)	| ...SC is -2^(-16382)
+	movel		#0x80000000,SC+4(%a6)
+	clrl		SC+8(%a6)
+	fmovex		(%a0),%fp0
+	fmovel		%d1,%FPCR
+	faddx		SC(%a6),%fp0
+
+	bra		t_frcinx
+
+EM12TINY:
+|--Step 8.3
+	fmovex		(%a0),%fp0
+	fmuld		TWO140,%fp0
+	movel		#0x80010000,SC(%a6)
+	movel		#0x80000000,SC+4(%a6)
+	clrl		SC+8(%a6)
+	faddx		SC(%a6),%fp0
+	fmovel		%d1,%FPCR
+	fmuld		TWON140,%fp0
+
+	bra		t_frcinx
+
+EM1POLY:
+|--Step 9	exp(X)-1 by a simple polynomial
+	fmovex		(%a0),%fp0	| ...fp0 is X
+	fmulx		%fp0,%fp0		| ...fp0 is S := X*X
+	fmovemx	%fp2-%fp2/%fp3,-(%a7)	| ...save fp2
+	fmoves		#0x2F30CAA8,%fp1	| ...fp1 is B12
+	fmulx		%fp0,%fp1		| ...fp1 is S*B12
+	fmoves		#0x310F8290,%fp2	| ...fp2 is B11
+	fadds		#0x32D73220,%fp1	| ...fp1 is B10+S*B12
+
+	fmulx		%fp0,%fp2		| ...fp2 is S*B11
+	fmulx		%fp0,%fp1		| ...fp1 is S*(B10 + ...
+
+	fadds		#0x3493F281,%fp2	| ...fp2 is B9+S*...
+	faddd		EM1B8,%fp1	| ...fp1 is B8+S*...
+
+	fmulx		%fp0,%fp2		| ...fp2 is S*(B9+...
+	fmulx		%fp0,%fp1		| ...fp1 is S*(B8+...
+
+	faddd		EM1B7,%fp2	| ...fp2 is B7+S*...
+	faddd		EM1B6,%fp1	| ...fp1 is B6+S*...
+
+	fmulx		%fp0,%fp2		| ...fp2 is S*(B7+...
+	fmulx		%fp0,%fp1		| ...fp1 is S*(B6+...
+
+	faddd		EM1B5,%fp2	| ...fp2 is B5+S*...
+	faddd		EM1B4,%fp1	| ...fp1 is B4+S*...
+
+	fmulx		%fp0,%fp2		| ...fp2 is S*(B5+...
+	fmulx		%fp0,%fp1		| ...fp1 is S*(B4+...
+
+	faddd		EM1B3,%fp2	| ...fp2 is B3+S*...
+	faddx		EM1B2,%fp1	| ...fp1 is B2+S*...
+
+	fmulx		%fp0,%fp2		| ...fp2 is S*(B3+...
+	fmulx		%fp0,%fp1		| ...fp1 is S*(B2+...
+
+	fmulx		%fp0,%fp2		| ...fp2 is S*S*(B3+...)
+	fmulx		(%a0),%fp1	| ...fp1 is X*S*(B2...
+
+	fmuls		#0x3F000000,%fp0	| ...fp0 is S*B1
+	faddx		%fp2,%fp1		| ...fp1 is Q
+|					...fp2 released
+
+	fmovemx	(%a7)+,%fp2-%fp2/%fp3	| ...fp2 restored
+
+	faddx		%fp1,%fp0		| ...fp0 is S*B1+Q
+|					...fp1 released
+
+	fmovel		%d1,%FPCR
+	faddx		(%a0),%fp0
+
+	bra		t_frcinx
+
+EM1BIG:
+|--Step 10	|X| > 70 log2
+	movel		(%a0),%d0
+	cmpil		#0,%d0
+	bgt		EXPC1
+|--Step 10.2
+	fmoves		#0xBF800000,%fp0	| ...fp0 is -1
+	fmovel		%d1,%FPCR
+	fadds		#0x00800000,%fp0	| ...-1 + 2^(-126)
+
+	bra		t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/sgetem.S b/arch/m68k/fpsp040/sgetem.S
new file mode 100644
index 0000000..0fcbd04
--- /dev/null
+++ b/arch/m68k/fpsp040/sgetem.S
@@ -0,0 +1,141 @@
+|
+|	sgetem.sa 3.1 12/10/90
+|
+|	The entry point sGETEXP returns the exponent portion
+|	of the input argument.  The exponent bias is removed
+|	and the exponent value is returned as an extended
+|	precision number in fp0.  sGETEXPD handles denormalized
+|	numbers.
+|
+|	The entry point sGETMAN extracts the mantissa of the
+|	input argument.  The mantissa is converted to an
+|	extended precision number and returned in fp0.  The
+|	range of the result is [1.0 - 2.0).
+|
+|
+|	Input:  Double-extended number X in the ETEMP space in
+|		the floating-point save stack.
+|
+|	Output:	The functions return exp(X) or man(X) in fp0.
+|
+|	Modified: fp0.
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SGETEM	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section 8
+
+#include "fpsp.h"
+
+	|xref	nrm_set
+
+|
+| This entry point is used by the unimplemented instruction exception
+| handler.  It points a0 to the input operand.
+|
+|
+|
+|	SGETEXP
+|
+
+	.global	sgetexp
+sgetexp:
+	movew	LOCAL_EX(%a0),%d0	|get the exponent
+	bclrl	#15,%d0		|clear the sign bit
+	subw	#0x3fff,%d0	|subtract off the bias
+	fmovew  %d0,%fp0		|move the exp to fp0
+	rts
+
+	.global	sgetexpd
+sgetexpd:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	bsr	nrm_set		|normalize (exp will go negative)
+	movew	LOCAL_EX(%a0),%d0	|load resulting exponent into d0
+	subw	#0x3fff,%d0	|subtract off the bias
+	fmovew	%d0,%fp0		|move the exp to fp0
+	rts
+|
+|
+| This entry point is used by the unimplemented instruction exception
+| handler.  It points a0 to the input operand.
+|
+|
+|
+|	SGETMAN
+|
+|
+| For normalized numbers, leave the mantissa alone, simply load
+| with an exponent of +/- $3fff.
+|
+	.global	sgetman
+sgetman:
+	movel	USER_FPCR(%a6),%d0
+	andil	#0xffffff00,%d0	|clear rounding precision and mode
+	fmovel	%d0,%fpcr		|this fpcr setting is used by the 882
+	movew	LOCAL_EX(%a0),%d0	|get the exp (really just want sign bit)
+	orw	#0x7fff,%d0	|clear old exp
+	bclrl	#14,%d0		|make it the new exp +-3fff
+	movew	%d0,LOCAL_EX(%a0)	|move the sign & exp back to fsave stack
+	fmovex	(%a0),%fp0	|put new value back in fp0
+	rts
+
+|
+| For denormalized numbers, shift the mantissa until the j-bit = 1,
+| then load the exponent with +/1 $3fff.
+|
+	.global	sgetmand
+sgetmand:
+	movel	LOCAL_HI(%a0),%d0	|load ms mant in d0
+	movel	LOCAL_LO(%a0),%d1	|load ls mant in d1
+	bsr	shft		|shift mantissa bits till msbit is set
+	movel	%d0,LOCAL_HI(%a0)	|put ms mant back on stack
+	movel	%d1,LOCAL_LO(%a0)	|put ls mant back on stack
+	bras	sgetman
+
+|
+|	SHFT
+|
+|	Shifts the mantissa bits until msbit is set.
+|	input:
+|		ms mantissa part in d0
+|		ls mantissa part in d1
+|	output:
+|		shifted bits in d0 and d1
+shft:
+	tstl	%d0		|if any bits set in ms mant
+	bnes	upper		|then branch
+|				;else no bits set in ms mant
+	tstl	%d1		|test if any bits set in ls mant
+	bnes	cont		|if set then continue
+	bras	shft_end	|else return
+cont:
+	movel	%d3,-(%a7)	|save d3
+	exg	%d0,%d1		|shift ls mant to ms mant
+	bfffo	%d0{#0:#32},%d3	|find first 1 in ls mant to d0
+	lsll	%d3,%d0		|shift first 1 to integer bit in ms mant
+	movel	(%a7)+,%d3	|restore d3
+	bras	shft_end
+upper:
+
+	moveml	%d3/%d5/%d6,-(%a7)	|save registers
+	bfffo	%d0{#0:#32},%d3	|find first 1 in ls mant to d0
+	lsll	%d3,%d0		|shift ms mant until j-bit is set
+	movel	%d1,%d6		|save ls mant in d6
+	lsll	%d3,%d1		|shift ls mant by count
+	movel	#32,%d5
+	subl	%d3,%d5		|sub 32 from shift for ls mant
+	lsrl	%d5,%d6		|shift off all bits but those that will
+|				;be shifted into ms mant
+	orl	%d6,%d0		|shift the ls mant bits into the ms mant
+	moveml	(%a7)+,%d3/%d5/%d6	|restore registers
+shft_end:
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/sint.S b/arch/m68k/fpsp040/sint.S
new file mode 100644
index 0000000..0f9bd28
--- /dev/null
+++ b/arch/m68k/fpsp040/sint.S
@@ -0,0 +1,247 @@
+|
+|	sint.sa 3.1 12/10/90
+|
+|	The entry point sINT computes the rounded integer
+|	equivalent of the input argument, sINTRZ computes
+|	the integer rounded to zero of the input argument.
+|
+|	Entry points sint and sintrz are called from do_func
+|	to emulate the fint and fintrz unimplemented instructions,
+|	respectively.  Entry point sintdo is used by bindec.
+|
+|	Input: (Entry points sint and sintrz) Double-extended
+|		number X in the ETEMP space in the floating-point
+|		save stack.
+|	       (Entry point sintdo) Double-extended number X in
+|		location pointed to by the address register a0.
+|	       (Entry point sintd) Double-extended denormalized
+|		number X in the ETEMP space in the floating-point
+|		save stack.
+|
+|	Output: The function returns int(X) or intrz(X) in fp0.
+|
+|	Modifies: fp0.
+|
+|	Algorithm: (sint and sintrz)
+|
+|	1. If exp(X) >= 63, return X.
+|	   If exp(X) < 0, return +/- 0 or +/- 1, according to
+|	   the rounding mode.
+|
+|	2. (X is in range) set rsc = 63 - exp(X). Unnormalize the
+|	   result to the exponent $403e.
+|
+|	3. Round the result in the mode given in USER_FPCR. For
+|	   sintrz, force round-to-zero mode.
+|
+|	4. Normalize the rounded result; store in fp0.
+|
+|	For the denormalized cases, force the correct result
+|	for the given sign and rounding mode.
+|
+|		        Sign(X)
+|		RMODE   +    -
+|		-----  --------
+|		 RN    +0   -0
+|		 RZ    +0   -0
+|		 RM    +0   -1
+|		 RP    +1   -0
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SINT    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	dnrm_lp
+	|xref	nrm_set
+	|xref	round
+	|xref	t_inx2
+	|xref	ld_pone
+	|xref	ld_mone
+	|xref	ld_pzero
+	|xref	ld_mzero
+	|xref	snzrinx
+
+|
+|	FINT
+|
+	.global	sint
+sint:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|use user's mode for rounding
+|					;implicitly has extend precision
+|					;in upper word.
+	movel	%d1,L_SCR1(%a6)		|save mode bits
+	bras	sintexc
+
+|
+|	FINT with extended denorm inputs.
+|
+	.global	sintd
+sintd:
+	btstb	#5,FPCR_MODE(%a6)
+	beq	snzrinx		|if round nearest or round zero, +/- 0
+	btstb	#4,FPCR_MODE(%a6)
+	beqs	rnd_mns
+rnd_pls:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	bnes	sintmz
+	bsr	ld_pone		|if round plus inf and pos, answer is +1
+	bra	t_inx2
+rnd_mns:
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	beqs	sintpz
+	bsr	ld_mone		|if round mns inf and neg, answer is -1
+	bra	t_inx2
+sintpz:
+	bsr	ld_pzero
+	bra	t_inx2
+sintmz:
+	bsr	ld_mzero
+	bra	t_inx2
+
+|
+|	FINTRZ
+|
+	.global	sintrz
+sintrz:
+	movel	#1,L_SCR1(%a6)		|use rz mode for rounding
+|					;implicitly has extend precision
+|					;in upper word.
+	bras	sintexc
+|
+|	SINTDO
+|
+|	Input:	a0 points to an IEEE extended format operand
+|	Output:	fp0 has the result
+|
+| Exceptions:
+|
+| If the subroutine results in an inexact operation, the inx2 and
+| ainx bits in the USER_FPSR are set.
+|
+|
+	.global	sintdo
+sintdo:
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1	|use user's mode for rounding
+|					;implicitly has ext precision
+|					;in upper word.
+	movel	%d1,L_SCR1(%a6)		|save mode bits
+|
+| Real work of sint is in sintexc
+|
+sintexc:
+	bclrb	#sign_bit,LOCAL_EX(%a0)	|convert to internal extended
+|					;format
+	sne	LOCAL_SGN(%a0)
+	cmpw	#0x403e,LOCAL_EX(%a0)	|check if (unbiased) exp > 63
+	bgts	out_rnge			|branch if exp < 63
+	cmpw	#0x3ffd,LOCAL_EX(%a0)	|check if (unbiased) exp < 0
+	bgt	in_rnge			|if 63 >= exp > 0, do calc
+|
+| Input is less than zero.  Restore sign, and check for directed
+| rounding modes.  L_SCR1 contains the rmode in the lower byte.
+|
+un_rnge:
+	btstb	#1,L_SCR1+3(%a6)		|check for rn and rz
+	beqs	un_rnrz
+	tstb	LOCAL_SGN(%a0)		|check for sign
+	bnes	un_rmrp_neg
+|
+| Sign is +.  If rp, load +1.0, if rm, load +0.0
+|
+	cmpib	#3,L_SCR1+3(%a6)		|check for rp
+	beqs	un_ldpone		|if rp, load +1.0
+	bsr	ld_pzero		|if rm, load +0.0
+	bra	t_inx2
+un_ldpone:
+	bsr	ld_pone
+	bra	t_inx2
+|
+| Sign is -.  If rm, load -1.0, if rp, load -0.0
+|
+un_rmrp_neg:
+	cmpib	#2,L_SCR1+3(%a6)		|check for rm
+	beqs	un_ldmone		|if rm, load -1.0
+	bsr	ld_mzero		|if rp, load -0.0
+	bra	t_inx2
+un_ldmone:
+	bsr	ld_mone
+	bra	t_inx2
+|
+| Rmode is rn or rz; return signed zero
+|
+un_rnrz:
+	tstb	LOCAL_SGN(%a0)		|check for sign
+	bnes	un_rnrz_neg
+	bsr	ld_pzero
+	bra	t_inx2
+un_rnrz_neg:
+	bsr	ld_mzero
+	bra	t_inx2
+
+|
+| Input is greater than 2^63.  All bits are significant.  Return
+| the input.
+|
+out_rnge:
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|change back to IEEE ext format
+	beqs	intps
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+intps:
+	fmovel	%fpcr,-(%sp)
+	fmovel	#0,%fpcr
+	fmovex LOCAL_EX(%a0),%fp0	|if exp > 63
+|					;then return X to the user
+|					;there are no fraction bits
+	fmovel	(%sp)+,%fpcr
+	rts
+
+in_rnge:
+|					;shift off fraction bits
+	clrl	%d0			|clear d0 - initial g,r,s for
+|					;dnrm_lp
+	movel	#0x403e,%d1		|set threshold for dnrm_lp
+|					;assumes a0 points to operand
+	bsr	dnrm_lp
+|					;returns unnormalized number
+|					;pointed by a0
+|					;output d0 supplies g,r,s
+|					;used by round
+	movel	L_SCR1(%a6),%d1		|use selected rounding mode
+|
+|
+	bsr	round			|round the unnorm based on users
+|					;input	a0 ptr to ext X
+|					;	d0 g,r,s bits
+|					;	d1 PREC/MODE info
+|					;output a0 ptr to rounded result
+|					;inexact flag set in USER_FPSR
+|					;if initial grs set
+|
+| normalize the rounded result and store value in fp0
+|
+	bsr	nrm_set			|normalize the unnorm
+|					;Input: a0 points to operand to
+|					;be normalized
+|					;Output: a0 points to normalized
+|					;result
+	bfclr	LOCAL_SGN(%a0){#0:#8}
+	beqs	nrmrndp
+	bsetb	#sign_bit,LOCAL_EX(%a0)	|return to IEEE extended format
+nrmrndp:
+	fmovel	%fpcr,-(%sp)
+	fmovel	#0,%fpcr
+	fmovex LOCAL_EX(%a0),%fp0	|move result to fp0
+	fmovel	(%sp)+,%fpcr
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/skeleton.S b/arch/m68k/fpsp040/skeleton.S
new file mode 100644
index 0000000..dbc1255
--- /dev/null
+++ b/arch/m68k/fpsp040/skeleton.S
@@ -0,0 +1,516 @@
+|
+|	skeleton.sa 3.2 4/26/91
+|
+|	This file contains code that is system dependent and will
+|	need to be modified to install the FPSP.
+|
+|	Each entry point for exception 'xxxx' begins with a 'jmp fpsp_xxxx'.
+|	Put any target system specific handling that must be done immediately
+|	before the jump instruction.  If there no handling necessary, then
+|	the 'fpsp_xxxx' handler entry point should be placed in the exception
+|	table so that the 'jmp' can be eliminated. If the FPSP determines that the
+|	exception is one that must be reported then there will be a
+|	return from the package by a 'jmp real_xxxx'.  At that point
+|	the machine state will be identical to the state before
+|	the FPSP was entered.  In particular, whatever condition
+|	that caused the exception will still be pending when the FPSP
+|	package returns.  Thus, there will be system specific code
+|	to handle the exception.
+|
+|	If the exception was completely handled by the package, then
+|	the return will be via a 'jmp fpsp_done'.  Unless there is
+|	OS specific work to be done (such as handling a context switch or
+|	interrupt) the user program can be resumed via 'rte'.
+|
+|	In the following skeleton code, some typical 'real_xxxx' handling
+|	code is shown.  This code may need to be moved to an appropriate
+|	place in the target system, or rewritten.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|
+|	Modified for Linux-1.3.x by Jes Sorensen (jds@kom.auc.dk)
+|
+
+#include <linux/linkage.h>
+#include <asm/entry.h>
+#include <asm/offsets.h>
+
+|SKELETON	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section 15
+|
+|	The following counters are used for standalone testing
+|
+
+	|section 8
+
+#include "fpsp.h"
+
+	|xref	b1238_fix
+
+|
+|	Divide by Zero exception
+|
+|	All dz exceptions are 'real', hence no fpsp_dz entry point.
+|
+	.global	dz
+	.global	real_dz
+dz:
+real_dz:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Inexact exception
+|
+|	All inexact exceptions are real, but the 'real' handler
+|	will probably want to clear the pending exception.
+|	The provided code will clear the E3 exception (if pending),
+|	otherwise clear the E1 exception.  The frestore is not really
+|	necessary for E1 exceptions.
+|
+| Code following the 'inex' label is to handle bug #1232.  In this
+| bug, if an E1 snan, ovfl, or unfl occurred, and the process was
+| swapped out before taking the exception, the exception taken on
+| return was inex, rather than the correct exception.  The snan, ovfl,
+| and unfl exception to be taken must not have been enabled.  The
+| fix is to check for E1, and the existence of one of snan, ovfl,
+| or unfl bits set in the fpsr.  If any of these are set, branch
+| to the appropriate  handler for the exception in the fpsr.  Note
+| that this fix is only for d43b parts, and is skipped if the
+| version number is not $40.
+|
+|
+	.global	real_inex
+	.global	inex
+inex:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	cmpib		#VER_40,(%sp)		|test version number
+	bnes		not_fmt40
+	fmovel		%fpsr,-(%sp)
+	btstb		#E1,E_BYTE(%a6)		|test for E1 set
+	beqs		not_b1232
+	btstb		#snan_bit,2(%sp) |test for snan
+	beq		inex_ckofl
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+	bra		snan
+inex_ckofl:
+	btstb		#ovfl_bit,2(%sp) |test for ovfl
+	beq		inex_ckufl
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+	bra		ovfl
+inex_ckufl:
+	btstb		#unfl_bit,2(%sp) |test for unfl
+	beq		not_b1232
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+	bra		unfl
+
+|
+| We do not have the bug 1232 case.  Clean up the stack and call
+| real_inex.
+|
+not_b1232:
+	addl		#4,%sp
+	frestore	(%sp)+
+	unlk		%a6
+
+real_inex:
+
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+not_fmt40:
+	bclrb		#E3,E_BYTE(%a6)		|clear and test E3 flag
+	beqs		inex_cke1
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	moveml		%d0/%d1,USER_DA(%a6)
+	bfextu		CMDREG1B(%a6){#6:#3},%d0		|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	moveml		USER_DA(%a6),%d0/%d1
+	bras		inex_done
+inex_cke1:
+	bclrb		#E1,E_BYTE(%a6)
+inex_done:
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Overflow exception
+|
+	|xref	fpsp_ovfl
+	.global	real_ovfl
+	.global	ovfl
+ovfl:
+	jmp	fpsp_ovfl
+real_ovfl:
+
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E3,E_BYTE(%a6)		|clear and test E3 flag
+	bnes		ovfl_done
+	bclrb		#E1,E_BYTE(%a6)
+ovfl_done:
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Underflow exception
+|
+	|xref	fpsp_unfl
+	.global	real_unfl
+	.global	unfl
+unfl:
+	jmp	fpsp_unfl
+real_unfl:
+
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E3,E_BYTE(%a6)		|clear and test E3 flag
+	bnes		unfl_done
+	bclrb		#E1,E_BYTE(%a6)
+unfl_done:
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Signalling NAN exception
+|
+	|xref	fpsp_snan
+	.global	real_snan
+	.global	snan
+snan:
+	jmp	fpsp_snan
+real_snan:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	|snan is always an E1 exception
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Operand Error exception
+|
+	|xref	fpsp_operr
+	.global	real_operr
+	.global	operr
+operr:
+	jmp	fpsp_operr
+real_operr:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	|operr is always an E1 exception
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+
+|
+|	BSUN exception
+|
+|	This sample handler simply clears the nan bit in the FPSR.
+|
+	|xref	fpsp_bsun
+	.global	real_bsun
+	.global	bsun
+bsun:
+	jmp	fpsp_bsun
+real_bsun:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	|bsun is always an E1 exception
+	fmovel		%FPSR,-(%sp)
+	bclrb		#nan_bit,(%sp)
+	fmovel		(%sp)+,%FPSR
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	F-line exception
+|
+|	A 'real' F-line exception is one that the FPSP isn't supposed to
+|	handle. E.g. an instruction with a co-processor ID that is not 1.
+|
+|
+	|xref	fpsp_fline
+	.global	real_fline
+	.global	fline
+fline:
+	jmp	fpsp_fline
+real_fline:
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Unsupported data type exception
+|
+	|xref	fpsp_unsupp
+	.global	real_unsupp
+	.global	unsupp
+unsupp:
+	jmp	fpsp_unsupp
+real_unsupp:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%sp)
+	bclrb		#E1,E_BYTE(%a6)	|unsupp is always an E1 exception
+	frestore	(%sp)+
+	unlk		%a6
+
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	movel	%sp,%sp@-		| stack frame pointer argument
+	bsrl	trap_c
+	addql	#4,%sp
+	bral	ret_from_exception
+
+|
+|	Trace exception
+|
+	.global	real_trace
+real_trace:
+	|
+	bral	trap
+
+|
+|	fpsp_fmt_error --- exit point for frame format error
+|
+|	The fpu stack frame does not match the frames existing
+|	or planned at the time of this writing.  The fpsp is
+|	unable to handle frame sizes not in the following
+|	version:size pairs:
+|
+|	{4060, 4160} - busy frame
+|	{4028, 4130} - unimp frame
+|	{4000, 4100} - idle frame
+|
+|	This entry point simply holds an f-line illegal value.
+|	Replace this with a call to your kernel panic code or
+|	code to handle future revisions of the fpu.
+|
+	.global	fpsp_fmt_error
+fpsp_fmt_error:
+
+	.long	0xf27f0000	|f-line illegal
+
+|
+|	fpsp_done --- FPSP exit point
+|
+|	The exception has been handled by the package and we are ready
+|	to return to user mode, but there may be OS specific code
+|	to execute before we do.  If there is, do it now.
+|
+|
+
+	.global	fpsp_done
+fpsp_done:
+	btst	#0x5,%sp@		| supervisor bit set in saved SR?
+	beq	.Lnotkern
+	rte
+.Lnotkern:
+	SAVE_ALL_INT
+	GET_CURRENT(%d0)
+	tstb	%curptr@(TASK_NEEDRESCHED)
+	jne	ret_from_exception	| deliver signals,
+					| reschedule etc..
+	RESTORE_ALL
+
+|
+|	mem_write --- write to user or supervisor address space
+|
+| Writes to memory while in supervisor mode.  copyout accomplishes
+| this via a 'moves' instruction.  copyout is a UNIX SVR3 (and later) function.
+| If you don't have copyout, use the local copy of the function below.
+|
+|	a0 - supervisor source address
+|	a1 - user destination address
+|	d0 - number of bytes to write (maximum count is 12)
+|
+| The supervisor source address is guaranteed to point into the supervisor
+| stack.  The result is that a UNIX
+| process is allowed to sleep as a consequence of a page fault during
+| copyout.  The probability of a page fault is exceedingly small because
+| the 68040 always reads the destination address and thus the page
+| faults should have already been handled.
+|
+| If the EXC_SR shows that the exception was from supervisor space,
+| then just do a dumb (and slow) memory move.  In a UNIX environment
+| there shouldn't be any supervisor mode floating point exceptions.
+|
+	.global	mem_write
+mem_write:
+	btstb	#5,EXC_SR(%a6)	|check for supervisor state
+	beqs	user_write
+super_write:
+	moveb	(%a0)+,(%a1)+
+	subql	#1,%d0
+	bnes	super_write
+	rts
+user_write:
+	movel	%d1,-(%sp)	|preserve d1 just in case
+	movel	%d0,-(%sp)
+	movel	%a1,-(%sp)
+	movel	%a0,-(%sp)
+	jsr		copyout
+	addw	#12,%sp
+	movel	(%sp)+,%d1
+	rts
+|
+|	mem_read --- read from user or supervisor address space
+|
+| Reads from memory while in supervisor mode.  copyin accomplishes
+| this via a 'moves' instruction.  copyin is a UNIX SVR3 (and later) function.
+| If you don't have copyin, use the local copy of the function below.
+|
+| The FPSP calls mem_read to read the original F-line instruction in order
+| to extract the data register number when the 'Dn' addressing mode is
+| used.
+|
+|Input:
+|	a0 - user source address
+|	a1 - supervisor destination address
+|	d0 - number of bytes to read (maximum count is 12)
+|
+| Like mem_write, mem_read always reads with a supervisor
+| destination address on the supervisor stack.  Also like mem_write,
+| the EXC_SR is checked and a simple memory copy is done if reading
+| from supervisor space is indicated.
+|
+	.global	mem_read
+mem_read:
+	btstb	#5,EXC_SR(%a6)	|check for supervisor state
+	beqs	user_read
+super_read:
+	moveb	(%a0)+,(%a1)+
+	subql	#1,%d0
+	bnes	super_read
+	rts
+user_read:
+	movel	%d1,-(%sp)	|preserve d1 just in case
+	movel	%d0,-(%sp)
+	movel	%a1,-(%sp)
+	movel	%a0,-(%sp)
+	jsr	copyin
+	addw	#12,%sp
+	movel	(%sp)+,%d1
+	rts
+
+|
+| Use these routines if your kernel doesn't have copyout/copyin equivalents.
+| Assumes that D0/D1/A0/A1 are scratch registers. copyout overwrites DFC,
+| and copyin overwrites SFC.
+|
+copyout:
+	movel	4(%sp),%a0	| source
+	movel	8(%sp),%a1	| destination
+	movel	12(%sp),%d0	| count
+	subl	#1,%d0		| dec count by 1 for dbra
+	movel	#1,%d1
+
+|	DFC is already set
+|	movec	%d1,%DFC		| set dfc for user data space
+moreout:
+	moveb	(%a0)+,%d1	| fetch supervisor byte
+out_ea:
+	movesb	%d1,(%a1)+	| write user byte
+	dbf	%d0,moreout
+	rts
+
+copyin:
+	movel	4(%sp),%a0	| source
+	movel	8(%sp),%a1	| destination
+	movel	12(%sp),%d0	| count
+	subl	#1,%d0		| dec count by 1 for dbra
+	movel	#1,%d1
+|	SFC is already set
+|	movec	%d1,%SFC		| set sfc for user space
+morein:
+in_ea:
+	movesb	(%a0)+,%d1	| fetch user byte
+	moveb	%d1,(%a1)+	| write supervisor byte
+	dbf	%d0,morein
+	rts
+
+	.section .fixup,#alloc,#execinstr
+	.even
+1:
+	jbra	fpsp040_die
+
+	.section __ex_table,#alloc
+	.align	4
+
+	.long	in_ea,1b
+	.long	out_ea,1b
+
+	|end
diff --git a/arch/m68k/fpsp040/slog2.S b/arch/m68k/fpsp040/slog2.S
new file mode 100644
index 0000000..517fa45
--- /dev/null
+++ b/arch/m68k/fpsp040/slog2.S
@@ -0,0 +1,188 @@
+|
+|	slog2.sa 3.1 12/10/90
+|
+|       The entry point slog10 computes the base-10
+|	logarithm of an input argument X.
+|	slog10d does the same except the input value is a
+|	denormalized number.
+|	sLog2 and sLog2d are the base-2 analogues.
+|
+|       INPUT:	Double-extended value in memory location pointed to
+|		by address register a0.
+|
+|       OUTPUT: log_10(X) or log_2(X) returned in floating-point
+|		register fp0.
+|
+|       ACCURACY and MONOTONICITY: The returned result is within 1.7
+|		ulps in 64 significant bit, i.e. within 0.5003 ulp
+|		to 53 bits if the result is subsequently rounded
+|		to double precision. The result is provably monotonic
+|		in double precision.
+|
+|       SPEED:	Two timings are measured, both in the copy-back mode.
+|		The first one is measured when the function is invoked
+|		the first time (so the instructions and data are not
+|		in cache), and the second one is measured when the
+|		function is reinvoked at the same input argument.
+|
+|       ALGORITHM and IMPLEMENTATION NOTES:
+|
+|       slog10d:
+|
+|       Step 0.   If X < 0, create a NaN and raise the invalid operation
+|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+|       Notes:    Default means round-to-nearest mode, no floating-point
+|                 traps, and precision control = double extended.
+|
+|       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
+|       Notes:    Even if X is denormalized, log(X) is always normalized.
+|
+|       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
+|            2.1  Restore the user FPCR
+|            2.2  Return ans := Y * INV_L10.
+|
+|
+|       slog10:
+|
+|       Step 0.   If X < 0, create a NaN and raise the invalid operation
+|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+|       Notes:    Default means round-to-nearest mode, no floating-point
+|                 traps, and precision control = double extended.
+|
+|       Step 1.   Call sLogN to obtain Y = log(X), the natural log of X.
+|
+|       Step 2.   Compute log_10(X) = log(X) * (1/log(10)).
+|            2.1  Restore the user FPCR
+|            2.2  Return ans := Y * INV_L10.
+|
+|
+|       sLog2d:
+|
+|       Step 0.   If X < 0, create a NaN and raise the invalid operation
+|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+|       Notes:    Default means round-to-nearest mode, no floating-point
+|                 traps, and precision control = double extended.
+|
+|       Step 1.   Call slognd to obtain Y = log(X), the natural log of X.
+|       Notes:    Even if X is denormalized, log(X) is always normalized.
+|
+|       Step 2.   Compute log_10(X) = log(X) * (1/log(2)).
+|            2.1  Restore the user FPCR
+|            2.2  Return ans := Y * INV_L2.
+|
+|
+|       sLog2:
+|
+|       Step 0.   If X < 0, create a NaN and raise the invalid operation
+|                 flag. Otherwise, save FPCR in D1; set FpCR to default.
+|       Notes:    Default means round-to-nearest mode, no floating-point
+|                 traps, and precision control = double extended.
+|
+|       Step 1.   If X is not an integer power of two, i.e., X != 2^k,
+|                 go to Step 3.
+|
+|       Step 2.   Return k.
+|            2.1  Get integer k, X = 2^k.
+|            2.2  Restore the user FPCR.
+|            2.3  Return ans := convert-to-double-extended(k).
+|
+|       Step 3.   Call sLogN to obtain Y = log(X), the natural log of X.
+|
+|       Step 4.   Compute log_2(X) = log(X) * (1/log(2)).
+|            4.1  Restore the user FPCR
+|            4.2  Return ans := Y * INV_L2.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SLOG2    idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	t_frcinx
+	|xref	t_operr
+	|xref	slogn
+	|xref	slognd
+
+INV_L10:  .long 0x3FFD0000,0xDE5BD8A9,0x37287195,0x00000000
+
+INV_L2:   .long 0x3FFF0000,0xB8AA3B29,0x5C17F0BC,0x00000000
+
+	.global	slog10d
+slog10d:
+|--entry point for Log10(X), X is denormalized
+	movel		(%a0),%d0
+	blt		invalid
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slognd			| ...log(X), X denorm.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L10,%fp0
+	bra		t_frcinx
+
+	.global	slog10
+slog10:
+|--entry point for Log10(X), X is normalized
+
+	movel		(%a0),%d0
+	blt		invalid
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slogn			| ...log(X), X normal.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L10,%fp0
+	bra		t_frcinx
+
+
+	.global	slog2d
+slog2d:
+|--entry point for Log2(X), X is denormalized
+
+	movel		(%a0),%d0
+	blt		invalid
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slognd			| ...log(X), X denorm.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L2,%fp0
+	bra		t_frcinx
+
+	.global	slog2
+slog2:
+|--entry point for Log2(X), X is normalized
+	movel		(%a0),%d0
+	blt		invalid
+
+	movel		8(%a0),%d0
+	bnes		continue		| ...X is not 2^k
+
+	movel		4(%a0),%d0
+	andl		#0x7FFFFFFF,%d0
+	tstl		%d0
+	bnes		continue
+
+|--X = 2^k.
+	movew		(%a0),%d0
+	andl		#0x00007FFF,%d0
+	subl		#0x3FFF,%d0
+	fmovel		%d1,%fpcr
+	fmovel		%d0,%fp0
+	bra		t_frcinx
+
+continue:
+	movel		%d1,-(%sp)
+	clrl		%d1
+	bsr		slogn			| ...log(X), X normal.
+	fmovel		(%sp)+,%fpcr
+	fmulx		INV_L2,%fp0
+	bra		t_frcinx
+
+invalid:
+	bra		t_operr
+
+	|end
diff --git a/arch/m68k/fpsp040/slogn.S b/arch/m68k/fpsp040/slogn.S
new file mode 100644
index 0000000..2aaa072
--- /dev/null
+++ b/arch/m68k/fpsp040/slogn.S
@@ -0,0 +1,592 @@
+|
+|	slogn.sa 3.1 12/10/90
+|
+|	slogn computes the natural logarithm of an
+|	input value. slognd does the same except the input value is a
+|	denormalized number. slognp1 computes log(1+X), and slognp1d
+|	computes log(1+X) for denormalized X.
+|
+|	Input: Double-extended value in memory location pointed to by address
+|		register a0.
+|
+|	Output:	log(X) or log(1+X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 2 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program slogn takes approximately 190 cycles for input
+|		argument X such that |X-1| >= 1/16, which is the usual
+|		situation. For those arguments, slognp1 takes approximately
+|		 210 cycles. For the less common arguments, the program will
+|		 run no worse than 10% slower.
+|
+|	Algorithm:
+|	LOGN:
+|	Step 1. If |X-1| < 1/16, approximate log(X) by an odd polynomial in
+|		u, where u = 2(X-1)/(X+1). Otherwise, move on to Step 2.
+|
+|	Step 2. X = 2**k * Y where 1 <= Y < 2. Define F to be the first seven
+|		significant bits of Y plus 2**(-7), i.e. F = 1.xxxxxx1 in base
+|		2 where the six "x" match those of Y. Note that |Y-F| <= 2**(-7).
+|
+|	Step 3. Define u = (Y-F)/F. Approximate log(1+u) by a polynomial in u,
+|		log(1+u) = poly.
+|
+|	Step 4. Reconstruct log(X) = log( 2**k * Y ) = k*log(2) + log(F) + log(1+u)
+|		by k*log(2) + (log(F) + poly). The values of log(F) are calculated
+|		beforehand and stored in the program.
+|
+|	lognp1:
+|	Step 1: If |X| < 1/16, approximate log(1+X) by an odd polynomial in
+|		u where u = 2X/(2+X). Otherwise, move on to Step 2.
+|
+|	Step 2: Let 1+X = 2**k * Y, where 1 <= Y < 2. Define F as done in Step 2
+|		of the algorithm for LOGN and compute log(1+X) as
+|		k*log(2) + log(F) + poly where poly approximates log(1+u),
+|		u = (Y-F)/F.
+|
+|	Implementation Notes:
+|	Note 1. There are 64 different possible values for F, thus 64 log(F)'s
+|		need to be tabulated. Moreover, the values of 1/F are also
+|		tabulated so that the division in (Y-F)/F can be performed by a
+|		multiplication.
+|
+|	Note 2. In Step 2 of lognp1, in order to preserved accuracy, the value
+|		Y-F has to be calculated carefully when 1/2 <= X < 3/2.
+|
+|	Note 3. To fully exploit the pipeline, polynomials are usually separated
+|		into two parts evaluated independently before being added up.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|slogn	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+BOUNDS1:  .long 0x3FFEF07D,0x3FFF8841
+BOUNDS2:  .long 0x3FFE8000,0x3FFFC000
+
+LOGOF2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
+
+one:	.long 0x3F800000
+zero:	.long 0x00000000
+infty:	.long 0x7F800000
+negone:	.long 0xBF800000
+
+LOGA6:	.long 0x3FC2499A,0xB5E4040B
+LOGA5:	.long 0xBFC555B5,0x848CB7DB
+
+LOGA4:	.long 0x3FC99999,0x987D8730
+LOGA3:	.long 0xBFCFFFFF,0xFF6F7E97
+
+LOGA2:	.long 0x3FD55555,0x555555a4
+LOGA1:	.long 0xBFE00000,0x00000008
+
+LOGB5:	.long 0x3F175496,0xADD7DAD6
+LOGB4:	.long 0x3F3C71C2,0xFE80C7E0
+
+LOGB3:	.long 0x3F624924,0x928BCCFF
+LOGB2:	.long 0x3F899999,0x999995EC
+
+LOGB1:	.long 0x3FB55555,0x55555555
+TWO:	.long 0x40000000,0x00000000
+
+LTHOLD:	.long 0x3f990000,0x80000000,0x00000000,0x00000000
+
+LOGTBL:
+	.long  0x3FFE0000,0xFE03F80F,0xE03F80FE,0x00000000
+	.long  0x3FF70000,0xFF015358,0x833C47E2,0x00000000
+	.long  0x3FFE0000,0xFA232CF2,0x52138AC0,0x00000000
+	.long  0x3FF90000,0xBDC8D83E,0xAD88D549,0x00000000
+	.long  0x3FFE0000,0xF6603D98,0x0F6603DA,0x00000000
+	.long  0x3FFA0000,0x9CF43DCF,0xF5EAFD48,0x00000000
+	.long  0x3FFE0000,0xF2B9D648,0x0F2B9D65,0x00000000
+	.long  0x3FFA0000,0xDA16EB88,0xCB8DF614,0x00000000
+	.long  0x3FFE0000,0xEF2EB71F,0xC4345238,0x00000000
+	.long  0x3FFB0000,0x8B29B775,0x1BD70743,0x00000000
+	.long  0x3FFE0000,0xEBBDB2A5,0xC1619C8C,0x00000000
+	.long  0x3FFB0000,0xA8D839F8,0x30C1FB49,0x00000000
+	.long  0x3FFE0000,0xE865AC7B,0x7603A197,0x00000000
+	.long  0x3FFB0000,0xC61A2EB1,0x8CD907AD,0x00000000
+	.long  0x3FFE0000,0xE525982A,0xF70C880E,0x00000000
+	.long  0x3FFB0000,0xE2F2A47A,0xDE3A18AF,0x00000000
+	.long  0x3FFE0000,0xE1FC780E,0x1FC780E2,0x00000000
+	.long  0x3FFB0000,0xFF64898E,0xDF55D551,0x00000000
+	.long  0x3FFE0000,0xDEE95C4C,0xA037BA57,0x00000000
+	.long  0x3FFC0000,0x8DB956A9,0x7B3D0148,0x00000000
+	.long  0x3FFE0000,0xDBEB61EE,0xD19C5958,0x00000000
+	.long  0x3FFC0000,0x9B8FE100,0xF47BA1DE,0x00000000
+	.long  0x3FFE0000,0xD901B203,0x6406C80E,0x00000000
+	.long  0x3FFC0000,0xA9372F1D,0x0DA1BD17,0x00000000
+	.long  0x3FFE0000,0xD62B80D6,0x2B80D62C,0x00000000
+	.long  0x3FFC0000,0xB6B07F38,0xCE90E46B,0x00000000
+	.long  0x3FFE0000,0xD3680D36,0x80D3680D,0x00000000
+	.long  0x3FFC0000,0xC3FD0329,0x06488481,0x00000000
+	.long  0x3FFE0000,0xD0B69FCB,0xD2580D0B,0x00000000
+	.long  0x3FFC0000,0xD11DE0FF,0x15AB18CA,0x00000000
+	.long  0x3FFE0000,0xCE168A77,0x25080CE1,0x00000000
+	.long  0x3FFC0000,0xDE1433A1,0x6C66B150,0x00000000
+	.long  0x3FFE0000,0xCB8727C0,0x65C393E0,0x00000000
+	.long  0x3FFC0000,0xEAE10B5A,0x7DDC8ADD,0x00000000
+	.long  0x3FFE0000,0xC907DA4E,0x871146AD,0x00000000
+	.long  0x3FFC0000,0xF7856E5E,0xE2C9B291,0x00000000
+	.long  0x3FFE0000,0xC6980C69,0x80C6980C,0x00000000
+	.long  0x3FFD0000,0x82012CA5,0xA68206D7,0x00000000
+	.long  0x3FFE0000,0xC4372F85,0x5D824CA6,0x00000000
+	.long  0x3FFD0000,0x882C5FCD,0x7256A8C5,0x00000000
+	.long  0x3FFE0000,0xC1E4BBD5,0x95F6E947,0x00000000
+	.long  0x3FFD0000,0x8E44C60B,0x4CCFD7DE,0x00000000
+	.long  0x3FFE0000,0xBFA02FE8,0x0BFA02FF,0x00000000
+	.long  0x3FFD0000,0x944AD09E,0xF4351AF6,0x00000000
+	.long  0x3FFE0000,0xBD691047,0x07661AA3,0x00000000
+	.long  0x3FFD0000,0x9A3EECD4,0xC3EAA6B2,0x00000000
+	.long  0x3FFE0000,0xBB3EE721,0xA54D880C,0x00000000
+	.long  0x3FFD0000,0xA0218434,0x353F1DE8,0x00000000
+	.long  0x3FFE0000,0xB92143FA,0x36F5E02E,0x00000000
+	.long  0x3FFD0000,0xA5F2FCAB,0xBBC506DA,0x00000000
+	.long  0x3FFE0000,0xB70FBB5A,0x19BE3659,0x00000000
+	.long  0x3FFD0000,0xABB3B8BA,0x2AD362A5,0x00000000
+	.long  0x3FFE0000,0xB509E68A,0x9B94821F,0x00000000
+	.long  0x3FFD0000,0xB1641795,0xCE3CA97B,0x00000000
+	.long  0x3FFE0000,0xB30F6352,0x8917C80B,0x00000000
+	.long  0x3FFD0000,0xB7047551,0x5D0F1C61,0x00000000
+	.long  0x3FFE0000,0xB11FD3B8,0x0B11FD3C,0x00000000
+	.long  0x3FFD0000,0xBC952AFE,0xEA3D13E1,0x00000000
+	.long  0x3FFE0000,0xAF3ADDC6,0x80AF3ADE,0x00000000
+	.long  0x3FFD0000,0xC2168ED0,0xF458BA4A,0x00000000
+	.long  0x3FFE0000,0xAD602B58,0x0AD602B6,0x00000000
+	.long  0x3FFD0000,0xC788F439,0xB3163BF1,0x00000000
+	.long  0x3FFE0000,0xAB8F69E2,0x8359CD11,0x00000000
+	.long  0x3FFD0000,0xCCECAC08,0xBF04565D,0x00000000
+	.long  0x3FFE0000,0xA9C84A47,0xA07F5638,0x00000000
+	.long  0x3FFD0000,0xD2420487,0x2DD85160,0x00000000
+	.long  0x3FFE0000,0xA80A80A8,0x0A80A80B,0x00000000
+	.long  0x3FFD0000,0xD7894992,0x3BC3588A,0x00000000
+	.long  0x3FFE0000,0xA655C439,0x2D7B73A8,0x00000000
+	.long  0x3FFD0000,0xDCC2C4B4,0x9887DACC,0x00000000
+	.long  0x3FFE0000,0xA4A9CF1D,0x96833751,0x00000000
+	.long  0x3FFD0000,0xE1EEBD3E,0x6D6A6B9E,0x00000000
+	.long  0x3FFE0000,0xA3065E3F,0xAE7CD0E0,0x00000000
+	.long  0x3FFD0000,0xE70D785C,0x2F9F5BDC,0x00000000
+	.long  0x3FFE0000,0xA16B312E,0xA8FC377D,0x00000000
+	.long  0x3FFD0000,0xEC1F392C,0x5179F283,0x00000000
+	.long  0x3FFE0000,0x9FD809FD,0x809FD80A,0x00000000
+	.long  0x3FFD0000,0xF12440D3,0xE36130E6,0x00000000
+	.long  0x3FFE0000,0x9E4CAD23,0xDD5F3A20,0x00000000
+	.long  0x3FFD0000,0xF61CCE92,0x346600BB,0x00000000
+	.long  0x3FFE0000,0x9CC8E160,0xC3FB19B9,0x00000000
+	.long  0x3FFD0000,0xFB091FD3,0x8145630A,0x00000000
+	.long  0x3FFE0000,0x9B4C6F9E,0xF03A3CAA,0x00000000
+	.long  0x3FFD0000,0xFFE97042,0xBFA4C2AD,0x00000000
+	.long  0x3FFE0000,0x99D722DA,0xBDE58F06,0x00000000
+	.long  0x3FFE0000,0x825EFCED,0x49369330,0x00000000
+	.long  0x3FFE0000,0x9868C809,0x868C8098,0x00000000
+	.long  0x3FFE0000,0x84C37A7A,0xB9A905C9,0x00000000
+	.long  0x3FFE0000,0x97012E02,0x5C04B809,0x00000000
+	.long  0x3FFE0000,0x87224C2E,0x8E645FB7,0x00000000
+	.long  0x3FFE0000,0x95A02568,0x095A0257,0x00000000
+	.long  0x3FFE0000,0x897B8CAC,0x9F7DE298,0x00000000
+	.long  0x3FFE0000,0x94458094,0x45809446,0x00000000
+	.long  0x3FFE0000,0x8BCF55DE,0xC4CD05FE,0x00000000
+	.long  0x3FFE0000,0x92F11384,0x0497889C,0x00000000
+	.long  0x3FFE0000,0x8E1DC0FB,0x89E125E5,0x00000000
+	.long  0x3FFE0000,0x91A2B3C4,0xD5E6F809,0x00000000
+	.long  0x3FFE0000,0x9066E68C,0x955B6C9B,0x00000000
+	.long  0x3FFE0000,0x905A3863,0x3E06C43B,0x00000000
+	.long  0x3FFE0000,0x92AADE74,0xC7BE59E0,0x00000000
+	.long  0x3FFE0000,0x8F1779D9,0xFDC3A219,0x00000000
+	.long  0x3FFE0000,0x94E9BFF6,0x15845643,0x00000000
+	.long  0x3FFE0000,0x8DDA5202,0x37694809,0x00000000
+	.long  0x3FFE0000,0x9723A1B7,0x20134203,0x00000000
+	.long  0x3FFE0000,0x8CA29C04,0x6514E023,0x00000000
+	.long  0x3FFE0000,0x995899C8,0x90EB8990,0x00000000
+	.long  0x3FFE0000,0x8B70344A,0x139BC75A,0x00000000
+	.long  0x3FFE0000,0x9B88BDAA,0x3A3DAE2F,0x00000000
+	.long  0x3FFE0000,0x8A42F870,0x5669DB46,0x00000000
+	.long  0x3FFE0000,0x9DB4224F,0xFFE1157C,0x00000000
+	.long  0x3FFE0000,0x891AC73A,0xE9819B50,0x00000000
+	.long  0x3FFE0000,0x9FDADC26,0x8B7A12DA,0x00000000
+	.long  0x3FFE0000,0x87F78087,0xF78087F8,0x00000000
+	.long  0x3FFE0000,0xA1FCFF17,0xCE733BD4,0x00000000
+	.long  0x3FFE0000,0x86D90544,0x7A34ACC6,0x00000000
+	.long  0x3FFE0000,0xA41A9E8F,0x5446FB9F,0x00000000
+	.long  0x3FFE0000,0x85BF3761,0x2CEE3C9B,0x00000000
+	.long  0x3FFE0000,0xA633CD7E,0x6771CD8B,0x00000000
+	.long  0x3FFE0000,0x84A9F9C8,0x084A9F9D,0x00000000
+	.long  0x3FFE0000,0xA8489E60,0x0B435A5E,0x00000000
+	.long  0x3FFE0000,0x83993052,0x3FBE3368,0x00000000
+	.long  0x3FFE0000,0xAA59233C,0xCCA4BD49,0x00000000
+	.long  0x3FFE0000,0x828CBFBE,0xB9A020A3,0x00000000
+	.long  0x3FFE0000,0xAC656DAE,0x6BCC4985,0x00000000
+	.long  0x3FFE0000,0x81848DA8,0xFAF0D277,0x00000000
+	.long  0x3FFE0000,0xAE6D8EE3,0x60BB2468,0x00000000
+	.long  0x3FFE0000,0x80808080,0x80808081,0x00000000
+	.long  0x3FFE0000,0xB07197A2,0x3C46C654,0x00000000
+
+	.set	ADJK,L_SCR1
+
+	.set	X,FP_SCR1
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	F,FP_SCR2
+	.set	FFRAC,F+4
+
+	.set	KLOG2,FP_SCR3
+
+	.set	SAVEU,FP_SCR4
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	t_operr
+	|xref	t_dz
+
+	.global	slognd
+slognd:
+|--ENTRY POINT FOR LOG(X) FOR DENORMALIZED INPUT
+
+	movel		#-100,ADJK(%a6)	| ...INPUT = 2^(ADJK) * FP0
+
+|----normalize the input value by left shifting k bits (k to be determined
+|----below), adjusting exponent and storing -k to  ADJK
+|----the value TWOTO100 is no longer needed.
+|----Note that this code assumes the denormalized input is NON-ZERO.
+
+     moveml	%d2-%d7,-(%a7)		| ...save some registers
+     movel	#0x00000000,%d3		| ...D3 is exponent of smallest norm. #
+     movel	4(%a0),%d4
+     movel	8(%a0),%d5		| ...(D4,D5) is (Hi_X,Lo_X)
+     clrl	%d2			| ...D2 used for holding K
+
+     tstl	%d4
+     bnes	HiX_not0
+
+HiX_0:
+     movel	%d5,%d4
+     clrl	%d5
+     movel	#32,%d2
+     clrl	%d6
+     bfffo      %d4{#0:#32},%d6
+     lsll      %d6,%d4
+     addl	%d6,%d2			| ...(D3,D4,D5) is normalized
+
+     movel	%d3,X(%a6)
+     movel	%d4,XFRAC(%a6)
+     movel	%d5,XFRAC+4(%a6)
+     negl	%d2
+     movel	%d2,ADJK(%a6)
+     fmovex	X(%a6),%fp0
+     moveml	(%a7)+,%d2-%d7		| ...restore registers
+     lea	X(%a6),%a0
+     bras	LOGBGN			| ...begin regular log(X)
+
+
+HiX_not0:
+     clrl	%d6
+     bfffo	%d4{#0:#32},%d6		| ...find first 1
+     movel	%d6,%d2			| ...get k
+     lsll	%d6,%d4
+     movel	%d5,%d7			| ...a copy of D5
+     lsll	%d6,%d5
+     negl	%d6
+     addil	#32,%d6
+     lsrl	%d6,%d7
+     orl	%d7,%d4			| ...(D3,D4,D5) normalized
+
+     movel	%d3,X(%a6)
+     movel	%d4,XFRAC(%a6)
+     movel	%d5,XFRAC+4(%a6)
+     negl	%d2
+     movel	%d2,ADJK(%a6)
+     fmovex	X(%a6),%fp0
+     moveml	(%a7)+,%d2-%d7		| ...restore registers
+     lea	X(%a6),%a0
+     bras	LOGBGN			| ...begin regular log(X)
+
+
+	.global	slogn
+slogn:
+|--ENTRY POINT FOR LOG(X) FOR X FINITE, NON-ZERO, NOT NAN'S
+
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+	movel		#0x00000000,ADJK(%a6)
+
+LOGBGN:
+|--FPCR SAVED AND CLEARED, INPUT IS 2^(ADJK)*FP0, FP0 CONTAINS
+|--A FINITE, NON-ZERO, NORMALIZED NUMBER.
+
+	movel	(%a0),%d0
+	movew	4(%a0),%d0
+
+	movel	(%a0),X(%a6)
+	movel	4(%a0),X+4(%a6)
+	movel	8(%a0),X+8(%a6)
+
+	cmpil	#0,%d0		| ...CHECK IF X IS NEGATIVE
+	blt	LOGNEG		| ...LOG OF NEGATIVE ARGUMENT IS INVALID
+	cmp2l	BOUNDS1,%d0	| ...X IS POSITIVE, CHECK IF X IS NEAR 1
+	bcc	LOGNEAR1	| ...BOUNDS IS ROUGHLY [15/16, 17/16]
+
+LOGMAIN:
+|--THIS SHOULD BE THE USUAL CASE, X NOT VERY CLOSE TO 1
+
+|--X = 2^(K) * Y, 1 <= Y < 2. THUS, Y = 1.XXXXXXXX....XX IN BINARY.
+|--WE DEFINE F = 1.XXXXXX1, I.E. FIRST 7 BITS OF Y AND ATTACH A 1.
+|--THE IDEA IS THAT LOG(X) = K*LOG2 + LOG(Y)
+|--			 = K*LOG2 + LOG(F) + LOG(1 + (Y-F)/F).
+|--NOTE THAT U = (Y-F)/F IS VERY SMALL AND THUS APPROXIMATING
+|--LOG(1+U) CAN BE VERY EFFICIENT.
+|--ALSO NOTE THAT THE VALUE 1/F IS STORED IN A TABLE SO THAT NO
+|--DIVISION IS NEEDED TO CALCULATE (Y-F)/F.
+
+|--GET K, Y, F, AND ADDRESS OF 1/F.
+	asrl	#8,%d0
+	asrl	#8,%d0		| ...SHIFTED 16 BITS, BIASED EXPO. OF X
+	subil	#0x3FFF,%d0	| ...THIS IS K
+	addl	ADJK(%a6),%d0	| ...ADJUST K, ORIGINAL INPUT MAY BE  DENORM.
+	lea	LOGTBL,%a0	| ...BASE ADDRESS OF 1/F AND LOG(F)
+	fmovel	%d0,%fp1		| ...CONVERT K TO FLOATING-POINT FORMAT
+
+|--WHILE THE CONVERSION IS GOING ON, WE GET F AND ADDRESS OF 1/F
+	movel	#0x3FFF0000,X(%a6)	| ...X IS NOW Y, I.E. 2^(-K)*X
+	movel	XFRAC(%a6),FFRAC(%a6)
+	andil	#0xFE000000,FFRAC(%a6) | ...FIRST 7 BITS OF Y
+	oril	#0x01000000,FFRAC(%a6) | ...GET F: ATTACH A 1 AT THE EIGHTH BIT
+	movel	FFRAC(%a6),%d0	| ...READY TO GET ADDRESS OF 1/F
+	andil	#0x7E000000,%d0
+	asrl	#8,%d0
+	asrl	#8,%d0
+	asrl	#4,%d0		| ...SHIFTED 20, D0 IS THE DISPLACEMENT
+	addal	%d0,%a0		| ...A0 IS THE ADDRESS FOR 1/F
+
+	fmovex	X(%a6),%fp0
+	movel	#0x3fff0000,F(%a6)
+	clrl	F+8(%a6)
+	fsubx	F(%a6),%fp0		| ...Y-F
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2 WHILE FP0 IS NOT READY
+|--SUMMARY: FP0 IS Y-F, A0 IS ADDRESS OF 1/F, FP1 IS K
+|--REGISTERS SAVED: FPCR, FP1, FP2
+
+LP1CONT1:
+|--AN RE-ENTRY POINT FOR LOGNP1
+	fmulx	(%a0),%fp0	| ...FP0 IS U = (Y-F)/F
+	fmulx	LOGOF2,%fp1	| ...GET K*LOG2 WHILE FP0 IS NOT READY
+	fmovex	%fp0,%fp2
+	fmulx	%fp2,%fp2		| ...FP2 IS V=U*U
+	fmovex	%fp1,KLOG2(%a6)	| ...PUT K*LOG2 IN MEMORY, FREE FP1
+
+|--LOG(1+U) IS APPROXIMATED BY
+|--U + V*(A1+U*(A2+U*(A3+U*(A4+U*(A5+U*A6))))) WHICH IS
+|--[U + V*(A1+V*(A3+V*A5))]  +  [U*V*(A2+V*(A4+V*A6))]
+
+	fmovex	%fp2,%fp3
+	fmovex	%fp2,%fp1
+
+	fmuld	LOGA6,%fp1	| ...V*A6
+	fmuld	LOGA5,%fp2	| ...V*A5
+
+	faddd	LOGA4,%fp1	| ...A4+V*A6
+	faddd	LOGA3,%fp2	| ...A3+V*A5
+
+	fmulx	%fp3,%fp1		| ...V*(A4+V*A6)
+	fmulx	%fp3,%fp2		| ...V*(A3+V*A5)
+
+	faddd	LOGA2,%fp1	| ...A2+V*(A4+V*A6)
+	faddd	LOGA1,%fp2	| ...A1+V*(A3+V*A5)
+
+	fmulx	%fp3,%fp1		| ...V*(A2+V*(A4+V*A6))
+	addal	#16,%a0		| ...ADDRESS OF LOG(F)
+	fmulx	%fp3,%fp2		| ...V*(A1+V*(A3+V*A5)), FP3 RELEASED
+
+	fmulx	%fp0,%fp1		| ...U*V*(A2+V*(A4+V*A6))
+	faddx	%fp2,%fp0		| ...U+V*(A1+V*(A3+V*A5)), FP2 RELEASED
+
+	faddx	(%a0),%fp1	| ...LOG(F)+U*V*(A2+V*(A4+V*A6))
+	fmovemx  (%sp)+,%fp2-%fp2/%fp3	| ...RESTORE FP2
+	faddx	%fp1,%fp0		| ...FP0 IS LOG(F) + LOG(1+U)
+
+	fmovel	%d1,%fpcr
+	faddx	KLOG2(%a6),%fp0	| ...FINAL ADD
+	bra	t_frcinx
+
+
+LOGNEAR1:
+|--REGISTERS SAVED: FPCR, FP1. FP0 CONTAINS THE INPUT.
+	fmovex	%fp0,%fp1
+	fsubs	one,%fp1		| ...FP1 IS X-1
+	fadds	one,%fp0		| ...FP0 IS X+1
+	faddx	%fp1,%fp1		| ...FP1 IS 2(X-1)
+|--LOG(X) = LOG(1+U/2)-LOG(1-U/2) WHICH IS AN ODD POLYNOMIAL
+|--IN U, U = 2(X-1)/(X+1) = FP1/FP0
+
+LP1CONT2:
+|--THIS IS AN RE-ENTRY POINT FOR LOGNP1
+	fdivx	%fp0,%fp1		| ...FP1 IS U
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	 | ...SAVE FP2
+|--REGISTERS SAVED ARE NOW FPCR,FP1,FP2,FP3
+|--LET V=U*U, W=V*V, CALCULATE
+|--U + U*V*(B1 + V*(B2 + V*(B3 + V*(B4 + V*B5)))) BY
+|--U + U*V*(  [B1 + W*(B3 + W*B5)]  +  [V*(B2 + W*B4)]  )
+	fmovex	%fp1,%fp0
+	fmulx	%fp0,%fp0	| ...FP0 IS V
+	fmovex	%fp1,SAVEU(%a6) | ...STORE U IN MEMORY, FREE FP1
+	fmovex	%fp0,%fp1
+	fmulx	%fp1,%fp1	| ...FP1 IS W
+
+	fmoved	LOGB5,%fp3
+	fmoved	LOGB4,%fp2
+
+	fmulx	%fp1,%fp3	| ...W*B5
+	fmulx	%fp1,%fp2	| ...W*B4
+
+	faddd	LOGB3,%fp3 | ...B3+W*B5
+	faddd	LOGB2,%fp2 | ...B2+W*B4
+
+	fmulx	%fp3,%fp1	| ...W*(B3+W*B5), FP3 RELEASED
+
+	fmulx	%fp0,%fp2	| ...V*(B2+W*B4)
+
+	faddd	LOGB1,%fp1 | ...B1+W*(B3+W*B5)
+	fmulx	SAVEU(%a6),%fp0 | ...FP0 IS U*V
+
+	faddx	%fp2,%fp1	| ...B1+W*(B3+W*B5) + V*(B2+W*B4), FP2 RELEASED
+	fmovemx (%sp)+,%fp2-%fp2/%fp3 | ...FP2 RESTORED
+
+	fmulx	%fp1,%fp0	| ...U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] )
+
+	fmovel	%d1,%fpcr
+	faddx	SAVEU(%a6),%fp0
+	bra	t_frcinx
+	rts
+
+LOGNEG:
+|--REGISTERS SAVED FPCR. LOG(-VE) IS INVALID
+	bra	t_operr
+
+	.global	slognp1d
+slognp1d:
+|--ENTRY POINT FOR LOG(1+Z) FOR DENORMALIZED INPUT
+| Simply return the denorm
+
+	bra	t_extdnrm
+
+	.global	slognp1
+slognp1:
+|--ENTRY POINT FOR LOG(1+X) FOR X FINITE, NON-ZERO, NOT NAN'S
+
+	fmovex	(%a0),%fp0	| ...LOAD INPUT
+	fabsx	%fp0		|test magnitude
+	fcmpx	LTHOLD,%fp0	|compare with min threshold
+	fbgt	LP1REAL		|if greater, continue
+	fmovel	#0,%fpsr		|clr N flag from compare
+	fmovel	%d1,%fpcr
+	fmovex	(%a0),%fp0	|return signed argument
+	bra	t_frcinx
+
+LP1REAL:
+	fmovex	(%a0),%fp0	| ...LOAD INPUT
+	movel	#0x00000000,ADJK(%a6)
+	fmovex	%fp0,%fp1	| ...FP1 IS INPUT Z
+	fadds	one,%fp0	| ...X := ROUND(1+Z)
+	fmovex	%fp0,X(%a6)
+	movew	XFRAC(%a6),XDCARE(%a6)
+	movel	X(%a6),%d0
+	cmpil	#0,%d0
+	ble	LP1NEG0	| ...LOG OF ZERO OR -VE
+	cmp2l	BOUNDS2,%d0
+	bcs	LOGMAIN	| ...BOUNDS2 IS [1/2,3/2]
+|--IF 1+Z > 3/2 OR 1+Z < 1/2, THEN X, WHICH IS ROUNDING 1+Z,
+|--CONTAINS AT LEAST 63 BITS OF INFORMATION OF Z. IN THAT CASE,
+|--SIMPLY INVOKE LOG(X) FOR LOG(1+Z).
+
+LP1NEAR1:
+|--NEXT SEE IF EXP(-1/16) < X < EXP(1/16)
+	cmp2l	BOUNDS1,%d0
+	bcss	LP1CARE
+
+LP1ONE16:
+|--EXP(-1/16) < X < EXP(1/16). LOG(1+Z) = LOG(1+U/2) - LOG(1-U/2)
+|--WHERE U = 2Z/(2+Z) = 2Z/(1+X).
+	faddx	%fp1,%fp1	| ...FP1 IS 2Z
+	fadds	one,%fp0	| ...FP0 IS 1+X
+|--U = FP1/FP0
+	bra	LP1CONT2
+
+LP1CARE:
+|--HERE WE USE THE USUAL TABLE DRIVEN APPROACH. CARE HAS TO BE
+|--TAKEN BECAUSE 1+Z CAN HAVE 67 BITS OF INFORMATION AND WE MUST
+|--PRESERVE ALL THE INFORMATION. BECAUSE 1+Z IS IN [1/2,3/2],
+|--THERE ARE ONLY TWO CASES.
+|--CASE 1: 1+Z < 1, THEN K = -1 AND Y-F = (2-F) + 2Z
+|--CASE 2: 1+Z > 1, THEN K = 0  AND Y-F = (1-F) + Z
+|--ON RETURNING TO LP1CONT1, WE MUST HAVE K IN FP1, ADDRESS OF
+|--(1/F) IN A0, Y-F IN FP0, AND FP2 SAVED.
+
+	movel	XFRAC(%a6),FFRAC(%a6)
+	andil	#0xFE000000,FFRAC(%a6)
+	oril	#0x01000000,FFRAC(%a6)	| ...F OBTAINED
+	cmpil	#0x3FFF8000,%d0	| ...SEE IF 1+Z > 1
+	bges	KISZERO
+
+KISNEG1:
+	fmoves	TWO,%fp0
+	movel	#0x3fff0000,F(%a6)
+	clrl	F+8(%a6)
+	fsubx	F(%a6),%fp0	| ...2-F
+	movel	FFRAC(%a6),%d0
+	andil	#0x7E000000,%d0
+	asrl	#8,%d0
+	asrl	#8,%d0
+	asrl	#4,%d0		| ...D0 CONTAINS DISPLACEMENT FOR 1/F
+	faddx	%fp1,%fp1		| ...GET 2Z
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...SAVE FP2
+	faddx	%fp1,%fp0		| ...FP0 IS Y-F = (2-F)+2Z
+	lea	LOGTBL,%a0	| ...A0 IS ADDRESS OF 1/F
+	addal	%d0,%a0
+	fmoves	negone,%fp1	| ...FP1 IS K = -1
+	bra	LP1CONT1
+
+KISZERO:
+	fmoves	one,%fp0
+	movel	#0x3fff0000,F(%a6)
+	clrl	F+8(%a6)
+	fsubx	F(%a6),%fp0		| ...1-F
+	movel	FFRAC(%a6),%d0
+	andil	#0x7E000000,%d0
+	asrl	#8,%d0
+	asrl	#8,%d0
+	asrl	#4,%d0
+	faddx	%fp1,%fp0		| ...FP0 IS Y-F
+	fmovemx %fp2-%fp2/%fp3,-(%sp)	| ...FP2 SAVED
+	lea	LOGTBL,%a0
+	addal	%d0,%a0		| ...A0 IS ADDRESS OF 1/F
+	fmoves	zero,%fp1	| ...FP1 IS K = 0
+	bra	LP1CONT1
+
+LP1NEG0:
+|--FPCR SAVED. D0 IS X IN COMPACT FORM.
+	cmpil	#0,%d0
+	blts	LP1NEG
+LP1ZERO:
+	fmoves	negone,%fp0
+
+	fmovel	%d1,%fpcr
+	bra t_dz
+
+LP1NEG:
+	fmoves	zero,%fp0
+
+	fmovel	%d1,%fpcr
+	bra	t_operr
+
+	|end
diff --git a/arch/m68k/fpsp040/smovecr.S b/arch/m68k/fpsp040/smovecr.S
new file mode 100644
index 0000000..a0127fa
--- /dev/null
+++ b/arch/m68k/fpsp040/smovecr.S
@@ -0,0 +1,162 @@
+|
+|	smovecr.sa 3.1 12/10/90
+|
+|	The entry point sMOVECR returns the constant at the
+|	offset given in the instruction field.
+|
+|	Input: An offset in the instruction word.
+|
+|	Output:	The constant rounded to the user's rounding
+|		mode unchecked for overflow.
+|
+|	Modified: fp0.
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SMOVECR	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section 8
+
+#include "fpsp.h"
+
+	|xref	nrm_set
+	|xref	round
+	|xref	PIRN
+	|xref	PIRZRM
+	|xref	PIRP
+	|xref	SMALRN
+	|xref	SMALRZRM
+	|xref	SMALRP
+	|xref	BIGRN
+	|xref	BIGRZRM
+	|xref	BIGRP
+
+FZERO:	.long	00000000
+|
+|	FMOVECR
+|
+	.global	smovcr
+smovcr:
+	bfextu	CMDREG1B(%a6){#9:#7},%d0 |get offset
+	bfextu	USER_FPCR(%a6){#26:#2},%d1 |get rmode
+|
+| check range of offset
+|
+	tstb	%d0		|if zero, offset is to pi
+	beqs	PI_TBL		|it is pi
+	cmpib	#0x0a,%d0		|check range $01 - $0a
+	bles	Z_VAL		|if in this range, return zero
+	cmpib	#0x0e,%d0		|check range $0b - $0e
+	bles	SM_TBL		|valid constants in this range
+	cmpib	#0x2f,%d0		|check range $10 - $2f
+	bles	Z_VAL		|if in this range, return zero
+	cmpib	#0x3f,%d0		|check range $30 - $3f
+	ble	BG_TBL		|valid constants in this range
+Z_VAL:
+	fmoves	FZERO,%fp0
+	rts
+PI_TBL:
+	tstb	%d1		|offset is zero, check for rmode
+	beqs	PI_RN		|if zero, rn mode
+	cmpib	#0x3,%d1		|check for rp
+	beqs	PI_RP		|if 3, rp mode
+PI_RZRM:
+	leal	PIRZRM,%a0	|rmode is rz or rm, load PIRZRM in a0
+	bra	set_finx
+PI_RN:
+	leal	PIRN,%a0		|rmode is rn, load PIRN in a0
+	bra	set_finx
+PI_RP:
+	leal	PIRP,%a0		|rmode is rp, load PIRP in a0
+	bra	set_finx
+SM_TBL:
+	subil	#0xb,%d0		|make offset in 0 - 4 range
+	tstb	%d1		|check for rmode
+	beqs	SM_RN		|if zero, rn mode
+	cmpib	#0x3,%d1		|check for rp
+	beqs	SM_RP		|if 3, rp mode
+SM_RZRM:
+	leal	SMALRZRM,%a0	|rmode is rz or rm, load SMRZRM in a0
+	cmpib	#0x2,%d0		|check if result is inex
+	ble	set_finx	|if 0 - 2, it is inexact
+	bra	no_finx		|if 3, it is exact
+SM_RN:
+	leal	SMALRN,%a0	|rmode is rn, load SMRN in a0
+	cmpib	#0x2,%d0		|check if result is inex
+	ble	set_finx	|if 0 - 2, it is inexact
+	bra	no_finx		|if 3, it is exact
+SM_RP:
+	leal	SMALRP,%a0	|rmode is rp, load SMRP in a0
+	cmpib	#0x2,%d0		|check if result is inex
+	ble	set_finx	|if 0 - 2, it is inexact
+	bra	no_finx		|if 3, it is exact
+BG_TBL:
+	subil	#0x30,%d0		|make offset in 0 - f range
+	tstb	%d1		|check for rmode
+	beqs	BG_RN		|if zero, rn mode
+	cmpib	#0x3,%d1		|check for rp
+	beqs	BG_RP		|if 3, rp mode
+BG_RZRM:
+	leal	BIGRZRM,%a0	|rmode is rz or rm, load BGRZRM in a0
+	cmpib	#0x1,%d0		|check if result is inex
+	ble	set_finx	|if 0 - 1, it is inexact
+	cmpib	#0x7,%d0		|second check
+	ble	no_finx		|if 0 - 7, it is exact
+	bra	set_finx	|if 8 - f, it is inexact
+BG_RN:
+	leal	BIGRN,%a0	|rmode is rn, load BGRN in a0
+	cmpib	#0x1,%d0		|check if result is inex
+	ble	set_finx	|if 0 - 1, it is inexact
+	cmpib	#0x7,%d0		|second check
+	ble	no_finx		|if 0 - 7, it is exact
+	bra	set_finx	|if 8 - f, it is inexact
+BG_RP:
+	leal	BIGRP,%a0	|rmode is rp, load SMRP in a0
+	cmpib	#0x1,%d0		|check if result is inex
+	ble	set_finx	|if 0 - 1, it is inexact
+	cmpib	#0x7,%d0		|second check
+	ble	no_finx		|if 0 - 7, it is exact
+|	bra	set_finx	;if 8 - f, it is inexact
+set_finx:
+	orl	#inx2a_mask,USER_FPSR(%a6) |set inex2/ainex
+no_finx:
+	mulul	#12,%d0			|use offset to point into tables
+	movel	%d1,L_SCR1(%a6)		|load mode for round call
+	bfextu	USER_FPCR(%a6){#24:#2},%d1	|get precision
+	tstl	%d1			|check if extended precision
+|
+| Precision is extended
+|
+	bnes	not_ext			|if extended, do not call round
+	fmovemx (%a0,%d0),%fp0-%fp0		|return result in fp0
+	rts
+|
+| Precision is single or double
+|
+not_ext:
+	swap	%d1			|rnd prec in upper word of d1
+	addl	L_SCR1(%a6),%d1		|merge rmode in low word of d1
+	movel	(%a0,%d0),FP_SCR1(%a6)	|load first word to temp storage
+	movel	4(%a0,%d0),FP_SCR1+4(%a6)	|load second word
+	movel	8(%a0,%d0),FP_SCR1+8(%a6)	|load third word
+	clrl	%d0			|clear g,r,s
+	lea	FP_SCR1(%a6),%a0
+	btstb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)		|convert to internal ext. format
+
+	bsr	round			|go round the mantissa
+
+	bfclr	LOCAL_SGN(%a0){#0:#8}	|convert back to IEEE ext format
+	beqs	fin_fcr
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+fin_fcr:
+	fmovemx (%a0),%fp0-%fp0
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/srem_mod.S b/arch/m68k/fpsp040/srem_mod.S
new file mode 100644
index 0000000..8c8d7f5
--- /dev/null
+++ b/arch/m68k/fpsp040/srem_mod.S
@@ -0,0 +1,422 @@
+|
+|	srem_mod.sa 3.1 12/10/90
+|
+|      The entry point sMOD computes the floating point MOD of the
+|      input values X and Y. The entry point sREM computes the floating
+|      point (IEEE) REM of the input values X and Y.
+|
+|      INPUT
+|      -----
+|      Double-extended value Y is pointed to by address in register
+|      A0. Double-extended value X is located in -12(A0). The values
+|      of X and Y are both nonzero and finite; although either or both
+|      of them can be denormalized. The special cases of zeros, NaNs,
+|      and infinities are handled elsewhere.
+|
+|      OUTPUT
+|      ------
+|      FREM(X,Y) or FMOD(X,Y), depending on entry point.
+|
+|       ALGORITHM
+|       ---------
+|
+|       Step 1.  Save and strip signs of X and Y: signX := sign(X),
+|                signY := sign(Y), X := |X|, Y := |Y|,
+|                signQ := signX EOR signY. Record whether MOD or REM
+|                is requested.
+|
+|       Step 2.  Set L := expo(X)-expo(Y), k := 0, Q := 0.
+|                If (L < 0) then
+|                   R := X, go to Step 4.
+|                else
+|                   R := 2^(-L)X, j := L.
+|                endif
+|
+|       Step 3.  Perform MOD(X,Y)
+|            3.1 If R = Y, go to Step 9.
+|            3.2 If R > Y, then { R := R - Y, Q := Q + 1}
+|            3.3 If j = 0, go to Step 4.
+|            3.4 k := k + 1, j := j - 1, Q := 2Q, R := 2R. Go to
+|                Step 3.1.
+|
+|       Step 4.  At this point, R = X - QY = MOD(X,Y). Set
+|                Last_Subtract := false (used in Step 7 below). If
+|                MOD is requested, go to Step 6.
+|
+|       Step 5.  R = MOD(X,Y), but REM(X,Y) is requested.
+|            5.1 If R < Y/2, then R = MOD(X,Y) = REM(X,Y). Go to
+|                Step 6.
+|            5.2 If R > Y/2, then { set Last_Subtract := true,
+|                Q := Q + 1, Y := signY*Y }. Go to Step 6.
+|            5.3 This is the tricky case of R = Y/2. If Q is odd,
+|                then { Q := Q + 1, signX := -signX }.
+|
+|       Step 6.  R := signX*R.
+|
+|       Step 7.  If Last_Subtract = true, R := R - Y.
+|
+|       Step 8.  Return signQ, last 7 bits of Q, and R as required.
+|
+|       Step 9.  At this point, R = 2^(-j)*X - Q Y = Y. Thus,
+|                X = 2^(j)*(Q+1)Y. set Q := 2^(j)*(Q+1),
+|                R := 0. Return signQ, last 7 bits of Q, and R.
+|
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+SREM_MOD:    |idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section    8
+
+#include "fpsp.h"
+
+	.set	Mod_Flag,L_SCR3
+	.set	SignY,FP_SCR3+4
+	.set	SignX,FP_SCR3+8
+	.set	SignQ,FP_SCR3+12
+	.set	Sc_Flag,FP_SCR4
+
+	.set	Y,FP_SCR1
+	.set	Y_Hi,Y+4
+	.set	Y_Lo,Y+8
+
+	.set	R,FP_SCR2
+	.set	R_Hi,R+4
+	.set	R_Lo,R+8
+
+
+Scale:     .long	0x00010000,0x80000000,0x00000000,0x00000000
+
+	|xref	t_avoid_unsupp
+
+        .global        smod
+smod:
+
+   movel               #0,Mod_Flag(%a6)
+   bras                Mod_Rem
+
+        .global        srem
+srem:
+
+   movel               #1,Mod_Flag(%a6)
+
+Mod_Rem:
+|..Save sign of X and Y
+   moveml              %d2-%d7,-(%a7)     | ...save data registers
+   movew               (%a0),%d3
+   movew               %d3,SignY(%a6)
+   andil               #0x00007FFF,%d3   | ...Y := |Y|
+
+|
+   movel               4(%a0),%d4
+   movel               8(%a0),%d5        | ...(D3,D4,D5) is |Y|
+
+   tstl                %d3
+   bnes                Y_Normal
+
+   movel               #0x00003FFE,%d3	| ...$3FFD + 1
+   tstl                %d4
+   bnes                HiY_not0
+
+HiY_0:
+   movel               %d5,%d4
+   clrl                %d5
+   subil               #32,%d3
+   clrl                %d6
+   bfffo                %d4{#0:#32},%d6
+   lsll                %d6,%d4
+   subl                %d6,%d3           | ...(D3,D4,D5) is normalized
+|                                       ...with bias $7FFD
+   bras                Chk_X
+
+HiY_not0:
+   clrl                %d6
+   bfffo                %d4{#0:#32},%d6
+   subl                %d6,%d3
+   lsll                %d6,%d4
+   movel               %d5,%d7           | ...a copy of D5
+   lsll                %d6,%d5
+   negl                %d6
+   addil               #32,%d6
+   lsrl                %d6,%d7
+   orl                 %d7,%d4           | ...(D3,D4,D5) normalized
+|                                       ...with bias $7FFD
+   bras                Chk_X
+
+Y_Normal:
+   addil               #0x00003FFE,%d3   | ...(D3,D4,D5) normalized
+|                                       ...with bias $7FFD
+
+Chk_X:
+   movew               -12(%a0),%d0
+   movew               %d0,SignX(%a6)
+   movew               SignY(%a6),%d1
+   eorl                %d0,%d1
+   andil               #0x00008000,%d1
+   movew               %d1,SignQ(%a6)	| ...sign(Q) obtained
+   andil               #0x00007FFF,%d0
+   movel               -8(%a0),%d1
+   movel               -4(%a0),%d2       | ...(D0,D1,D2) is |X|
+   tstl                %d0
+   bnes                X_Normal
+   movel               #0x00003FFE,%d0
+   tstl                %d1
+   bnes                HiX_not0
+
+HiX_0:
+   movel               %d2,%d1
+   clrl                %d2
+   subil               #32,%d0
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   lsll                %d6,%d1
+   subl                %d6,%d0           | ...(D0,D1,D2) is normalized
+|                                       ...with bias $7FFD
+   bras                Init
+
+HiX_not0:
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   subl                %d6,%d0
+   lsll                %d6,%d1
+   movel               %d2,%d7           | ...a copy of D2
+   lsll                %d6,%d2
+   negl                %d6
+   addil               #32,%d6
+   lsrl                %d6,%d7
+   orl                 %d7,%d1           | ...(D0,D1,D2) normalized
+|                                       ...with bias $7FFD
+   bras                Init
+
+X_Normal:
+   addil               #0x00003FFE,%d0   | ...(D0,D1,D2) normalized
+|                                       ...with bias $7FFD
+
+Init:
+|
+   movel               %d3,L_SCR1(%a6)   | ...save biased expo(Y)
+   movel		%d0,L_SCR2(%a6)	|save d0
+   subl                %d3,%d0           | ...L := expo(X)-expo(Y)
+|   Move.L               D0,L            ...D0 is j
+   clrl                %d6              | ...D6 := carry <- 0
+   clrl                %d3              | ...D3 is Q
+   moveal              #0,%a1           | ...A1 is k; j+k=L, Q=0
+
+|..(Carry,D1,D2) is R
+   tstl                %d0
+   bges                Mod_Loop
+
+|..expo(X) < expo(Y). Thus X = mod(X,Y)
+|
+   movel		L_SCR2(%a6),%d0	|restore d0
+   bra                Get_Mod
+
+|..At this point  R = 2^(-L)X; Q = 0; k = 0; and  k+j = L
+
+
+Mod_Loop:
+   tstl                %d6              | ...test carry bit
+   bgts                R_GT_Y
+
+|..At this point carry = 0, R = (D1,D2), Y = (D4,D5)
+   cmpl                %d4,%d1           | ...compare hi(R) and hi(Y)
+   bnes                R_NE_Y
+   cmpl                %d5,%d2           | ...compare lo(R) and lo(Y)
+   bnes                R_NE_Y
+
+|..At this point, R = Y
+   bra                Rem_is_0
+
+R_NE_Y:
+|..use the borrow of the previous compare
+   bcss                R_LT_Y          | ...borrow is set iff R < Y
+
+R_GT_Y:
+|..If Carry is set, then Y < (Carry,D1,D2) < 2Y. Otherwise, Carry = 0
+|..and Y < (D1,D2) < 2Y. Either way, perform R - Y
+   subl                %d5,%d2           | ...lo(R) - lo(Y)
+   subxl               %d4,%d1           | ...hi(R) - hi(Y)
+   clrl                %d6              | ...clear carry
+   addql               #1,%d3           | ...Q := Q + 1
+
+R_LT_Y:
+|..At this point, Carry=0, R < Y. R = 2^(k-L)X - QY; k+j = L; j >= 0.
+   tstl                %d0              | ...see if j = 0.
+   beqs                PostLoop
+
+   addl                %d3,%d3           | ...Q := 2Q
+   addl                %d2,%d2           | ...lo(R) = 2lo(R)
+   roxll               #1,%d1           | ...hi(R) = 2hi(R) + carry
+   scs                  %d6              | ...set Carry if 2(R) overflows
+   addql               #1,%a1           | ...k := k+1
+   subql               #1,%d0           | ...j := j - 1
+|..At this point, R=(Carry,D1,D2) = 2^(k-L)X - QY, j+k=L, j >= 0, R < 2Y.
+
+   bras                Mod_Loop
+
+PostLoop:
+|..k = L, j = 0, Carry = 0, R = (D1,D2) = X - QY, R < Y.
+
+|..normalize R.
+   movel               L_SCR1(%a6),%d0           | ...new biased expo of R
+   tstl                %d1
+   bnes                HiR_not0
+
+HiR_0:
+   movel               %d2,%d1
+   clrl                %d2
+   subil               #32,%d0
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   lsll                %d6,%d1
+   subl                %d6,%d0           | ...(D0,D1,D2) is normalized
+|                                       ...with bias $7FFD
+   bras                Get_Mod
+
+HiR_not0:
+   clrl                %d6
+   bfffo                %d1{#0:#32},%d6
+   bmis                Get_Mod         | ...already normalized
+   subl                %d6,%d0
+   lsll                %d6,%d1
+   movel               %d2,%d7           | ...a copy of D2
+   lsll                %d6,%d2
+   negl                %d6
+   addil               #32,%d6
+   lsrl                %d6,%d7
+   orl                 %d7,%d1           | ...(D0,D1,D2) normalized
+
+|
+Get_Mod:
+   cmpil		#0x000041FE,%d0
+   bges		No_Scale
+Do_Scale:
+   movew		%d0,R(%a6)
+   clrw		R+2(%a6)
+   movel		%d1,R_Hi(%a6)
+   movel		%d2,R_Lo(%a6)
+   movel		L_SCR1(%a6),%d6
+   movew		%d6,Y(%a6)
+   clrw		Y+2(%a6)
+   movel		%d4,Y_Hi(%a6)
+   movel		%d5,Y_Lo(%a6)
+   fmovex		R(%a6),%fp0		| ...no exception
+   movel		#1,Sc_Flag(%a6)
+   bras		ModOrRem
+No_Scale:
+   movel		%d1,R_Hi(%a6)
+   movel		%d2,R_Lo(%a6)
+   subil		#0x3FFE,%d0
+   movew		%d0,R(%a6)
+   clrw		R+2(%a6)
+   movel		L_SCR1(%a6),%d6
+   subil		#0x3FFE,%d6
+   movel		%d6,L_SCR1(%a6)
+   fmovex		R(%a6),%fp0
+   movew		%d6,Y(%a6)
+   movel		%d4,Y_Hi(%a6)
+   movel		%d5,Y_Lo(%a6)
+   movel		#0,Sc_Flag(%a6)
+
+|
+
+
+ModOrRem:
+   movel               Mod_Flag(%a6),%d6
+   beqs                Fix_Sign
+
+   movel               L_SCR1(%a6),%d6           | ...new biased expo(Y)
+   subql               #1,%d6           | ...biased expo(Y/2)
+   cmpl                %d6,%d0
+   blts                Fix_Sign
+   bgts                Last_Sub
+
+   cmpl                %d4,%d1
+   bnes                Not_EQ
+   cmpl                %d5,%d2
+   bnes                Not_EQ
+   bra                Tie_Case
+
+Not_EQ:
+   bcss                Fix_Sign
+
+Last_Sub:
+|
+   fsubx		Y(%a6),%fp0		| ...no exceptions
+   addql               #1,%d3           | ...Q := Q + 1
+
+|
+
+Fix_Sign:
+|..Get sign of X
+   movew               SignX(%a6),%d6
+   bges		Get_Q
+   fnegx		%fp0
+
+|..Get Q
+|
+Get_Q:
+   clrl		%d6
+   movew               SignQ(%a6),%d6        | ...D6 is sign(Q)
+   movel               #8,%d7
+   lsrl                %d7,%d6
+   andil               #0x0000007F,%d3   | ...7 bits of Q
+   orl                 %d6,%d3           | ...sign and bits of Q
+   swap                 %d3
+   fmovel              %fpsr,%d6
+   andil               #0xFF00FFFF,%d6
+   orl                 %d3,%d6
+   fmovel              %d6,%fpsr         | ...put Q in fpsr
+
+|
+Restore:
+   moveml              (%a7)+,%d2-%d7
+   fmovel              USER_FPCR(%a6),%fpcr
+   movel               Sc_Flag(%a6),%d0
+   beqs                Finish
+   fmulx		Scale(%pc),%fp0	| ...may cause underflow
+   bra			t_avoid_unsupp	|check for denorm as a
+|					;result of the scaling
+
+Finish:
+	fmovex		%fp0,%fp0		|capture exceptions & round
+	rts
+
+Rem_is_0:
+|..R = 2^(-j)X - Q Y = Y, thus R = 0 and quotient = 2^j (Q+1)
+   addql               #1,%d3
+   cmpil               #8,%d0           | ...D0 is j
+   bges                Q_Big
+
+   lsll                %d0,%d3
+   bras                Set_R_0
+
+Q_Big:
+   clrl                %d3
+
+Set_R_0:
+   fmoves		#0x00000000,%fp0
+   movel		#0,Sc_Flag(%a6)
+   bra                Fix_Sign
+
+Tie_Case:
+|..Check parity of Q
+   movel               %d3,%d6
+   andil               #0x00000001,%d6
+   tstl                %d6
+   beq                Fix_Sign	| ...Q is even
+
+|..Q is odd, Q := Q + 1, signX := -signX
+   addql               #1,%d3
+   movew               SignX(%a6),%d6
+   eoril               #0x00008000,%d6
+   movew               %d6,SignX(%a6)
+   bra                Fix_Sign
+
+   |end
diff --git a/arch/m68k/fpsp040/ssin.S b/arch/m68k/fpsp040/ssin.S
new file mode 100644
index 0000000..043c91c
--- /dev/null
+++ b/arch/m68k/fpsp040/ssin.S
@@ -0,0 +1,746 @@
+|
+|	ssin.sa 3.3 7/29/91
+|
+|	The entry point sSIN computes the sine of an input argument
+|	sCOS computes the cosine, and sSINCOS computes both. The
+|	corresponding entry points with a "d" computes the same
+|	corresponding function values for denormalized inputs.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The function value sin(X) or cos(X) returned in Fp0 if SIN or
+|		COS is requested. Otherwise, for SINCOS, sin(X) is returned
+|		in Fp0, and cos(X) is returned in Fp1.
+|
+|	Modifies: Fp0 for SIN or COS; both Fp0 and Fp1 for SINCOS.
+|
+|	Accuracy and Monotonicity: The returned result is within 1 ulp in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The programs sSIN and sCOS take approximately 150 cycles for
+|		input argument X such that |X| < 15Pi, which is the usual
+|		situation. The speed for sSINCOS is approximately 190 cycles.
+|
+|	Algorithm:
+|
+|	SIN and COS:
+|	1. If SIN is invoked, set AdjN := 0; otherwise, set AdjN := 1.
+|
+|	2. If |X| >= 15Pi or |X| < 2**(-40), go to 7.
+|
+|	3. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+|		k = N mod 4, so in particular, k = 0,1,2,or 3. Overwrite
+|		k by k := k + AdjN.
+|
+|	4. If k is even, go to 6.
+|
+|	5. (k is odd) Set j := (k-1)/2, sgn := (-1)**j. Return sgn*cos(r)
+|		where cos(r) is approximated by an even polynomial in r,
+|		1 + r*r*(B1+s*(B2+ ... + s*B8)),	s = r*r.
+|		Exit.
+|
+|	6. (k is even) Set j := k/2, sgn := (-1)**j. Return sgn*sin(r)
+|		where sin(r) is approximated by an odd polynomial in r
+|		r + r*s*(A1+s*(A2+ ... + s*A7)),	s = r*r.
+|		Exit.
+|
+|	7. If |X| > 1, go to 9.
+|
+|	8. (|X|<2**(-40)) If SIN is invoked, return X; otherwise return 1.
+|
+|	9. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 3.
+|
+|	SINCOS:
+|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
+|
+|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+|		k = N mod 4, so in particular, k = 0,1,2,or 3.
+|
+|	3. If k is even, go to 5.
+|
+|	4. (k is odd) Set j1 := (k-1)/2, j2 := j1 (EOR) (k mod 2), i.e.
+|		j1 exclusive or with the l.s.b. of k.
+|		sgn1 := (-1)**j1, sgn2 := (-1)**j2.
+|		SIN(X) = sgn1 * cos(r) and COS(X) = sgn2*sin(r) where
+|		sin(r) and cos(r) are computed as odd and even polynomials
+|		in r, respectively. Exit
+|
+|	5. (k is even) Set j1 := k/2, sgn1 := (-1)**j1.
+|		SIN(X) = sgn1 * sin(r) and COS(X) = sgn1*cos(r) where
+|		sin(r) and cos(r) are computed as odd and even polynomials
+|		in r, respectively. Exit
+|
+|	6. If |X| > 1, go to 8.
+|
+|	7. (|X|<2**(-40)) SIN(X) = X and COS(X) = 1. Exit.
+|
+|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SSIN	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+BOUNDS1:	.long 0x3FD78000,0x4004BC7E
+TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
+
+SINA7:	.long 0xBD6AAA77,0xCCC994F5
+SINA6:	.long 0x3DE61209,0x7AAE8DA1
+
+SINA5:	.long 0xBE5AE645,0x2A118AE4
+SINA4:	.long 0x3EC71DE3,0xA5341531
+
+SINA3:	.long 0xBF2A01A0,0x1A018B59,0x00000000,0x00000000
+
+SINA2:	.long 0x3FF80000,0x88888888,0x888859AF,0x00000000
+
+SINA1:	.long 0xBFFC0000,0xAAAAAAAA,0xAAAAAA99,0x00000000
+
+COSB8:	.long 0x3D2AC4D0,0xD6011EE3
+COSB7:	.long 0xBDA9396F,0x9F45AC19
+
+COSB6:	.long 0x3E21EED9,0x0612C972
+COSB5:	.long 0xBE927E4F,0xB79D9FCF
+
+COSB4:	.long 0x3EFA01A0,0x1A01D423,0x00000000,0x00000000
+
+COSB3:	.long 0xBFF50000,0xB60B60B6,0x0B61D438,0x00000000
+
+COSB2:	.long 0x3FFA0000,0xAAAAAAAA,0xAAAAAB5E
+COSB1:	.long 0xBF000000
+
+INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A
+
+TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
+TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
+
+	|xref	PITBL
+
+	.set	INARG,FP_SCR4
+
+	.set	X,FP_SCR5
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	RPRIME,FP_SCR1
+	.set	SPRIME,FP_SCR2
+
+	.set	POSNEG1,L_SCR1
+	.set	TWOTO63,L_SCR1
+
+	.set	ENDFLAG,L_SCR2
+	.set	N,L_SCR2
+
+	.set	ADJN,L_SCR3
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	sto_cos
+
+	.global	ssind
+ssind:
+|--SIN(X) = X FOR DENORMALIZED X
+	bra		t_extdnrm
+
+	.global	scosd
+scosd:
+|--COS(X) = 1 FOR DENORMALIZED X
+
+	fmoves		#0x3F800000,%fp0
+|
+|	9D25B Fix: Sometimes the previous fmove.s sets fpsr bits
+|
+	fmovel		#0,%fpsr
+|
+	bra		t_frcinx
+
+	.global	ssin
+ssin:
+|--SET ADJN TO 0
+	movel		#0,ADJN(%a6)
+	bras		SINBGN
+
+	.global	scos
+scos:
+|--SET ADJN TO 1
+	movel		#1,ADJN(%a6)
+
+SINBGN:
+|--SAVE FPCR, FP1. CHECK IF |X| IS TOO SMALL OR LARGE
+
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0		| ...COMPACTIFY X
+
+	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
+	bges		SOK1
+	bra		SINSM
+
+SOK1:
+	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
+	blts		SINMAIN
+	bra		REDUCEX
+
+SINMAIN:
+|--THIS IS THE USUAL CASE, |X| <= 15 PI.
+|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+	fmovex		%fp0,%fp1
+	fmuld		TWOBYPI,%fp1	| ...X*2/PI
+
+|--HIDE THE NEXT THREE INSTRUCTIONS
+	lea		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
+
+
+|--FP1 IS NOW READY
+	fmovel		%fp1,N(%a6)		| ...CONVERT TO INTEGER
+
+	movel		N(%a6),%d0
+	asll		#4,%d0
+	addal		%d0,%a1	| ...A1 IS THE ADDRESS OF N*PIBY2
+|				...WHICH IS IN TWO PIECES Y1 & Y2
+
+	fsubx		(%a1)+,%fp0	| ...X-Y1
+|--HIDE THE NEXT ONE
+	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
+
+SINCONT:
+|--continuation from REDUCEX
+
+|--GET N+ADJN AND SEE IF SIN(R) OR COS(R) IS NEEDED
+	movel		N(%a6),%d0
+	addl		ADJN(%a6),%d0	| ...SEE IF D0 IS ODD OR EVEN
+	rorl		#1,%d0	| ...D0 WAS ODD IFF D0 IS NEGATIVE
+	cmpil		#0,%d0
+	blt		COSPOLY
+
+SINPOLY:
+|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
+|--THEN WE RETURN	SGN*SIN(R). SGN*SIN(R) IS COMPUTED BY
+|--R' + R'*S*(A1 + S(A2 + S(A3 + S(A4 + ... + SA7)))), WHERE
+|--R' = SGN*R, S=R*R. THIS CAN BE REWRITTEN AS
+|--R' + R'*S*( [A1+T(A3+T(A5+TA7))] + [S(A2+T(A4+TA6))])
+|--WHERE T=S*S.
+|--NOTE THAT A3 THROUGH A7 ARE STORED IN DOUBLE PRECISION
+|--WHILE A1 AND A2 ARE IN DOUBLE-EXTENDED FORMAT.
+	fmovex		%fp0,X(%a6)	| ...X IS R
+	fmulx		%fp0,%fp0	| ...FP0 IS S
+|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
+	fmoved		SINA7,%fp3
+	fmoved		SINA6,%fp2
+|--FP0 IS NOW READY
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1	| ...FP1 IS T
+|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
+
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+|				...LEAST SIG. BIT OF D0 IN SIGN POSITION
+	eorl		%d0,X(%a6)	| ...X IS NOW R'= SGN*R
+
+	fmulx		%fp1,%fp3	| ...TA7
+	fmulx		%fp1,%fp2	| ...TA6
+
+	faddd		SINA5,%fp3 | ...A5+TA7
+	faddd		SINA4,%fp2 | ...A4+TA6
+
+	fmulx		%fp1,%fp3	| ...T(A5+TA7)
+	fmulx		%fp1,%fp2	| ...T(A4+TA6)
+
+	faddd		SINA3,%fp3 | ...A3+T(A5+TA7)
+	faddx		SINA2,%fp2 | ...A2+T(A4+TA6)
+
+	fmulx		%fp3,%fp1	| ...T(A3+T(A5+TA7))
+
+	fmulx		%fp0,%fp2	| ...S(A2+T(A4+TA6))
+	faddx		SINA1,%fp1 | ...A1+T(A3+T(A5+TA7))
+	fmulx		X(%a6),%fp0	| ...R'*S
+
+	faddx		%fp2,%fp1	| ...[A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))]
+|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
+|--FP2 RELEASED, RESTORE NOW AND TAKE FULL ADVANTAGE OF HIDING
+
+
+	fmulx		%fp1,%fp0		| ...SIN(R')-R'
+|--FP1 RELEASED.
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	faddx		X(%a6),%fp0		|last inst - possible exception set
+	bra		t_frcinx
+
+
+COSPOLY:
+|--LET J BE THE LEAST SIG. BIT OF D0, LET SGN := (-1)**J.
+|--THEN WE RETURN	SGN*COS(R). SGN*COS(R) IS COMPUTED BY
+|--SGN + S'*(B1 + S(B2 + S(B3 + S(B4 + ... + SB8)))), WHERE
+|--S=R*R AND S'=SGN*S. THIS CAN BE REWRITTEN AS
+|--SGN + S'*([B1+T(B3+T(B5+TB7))] + [S(B2+T(B4+T(B6+TB8)))])
+|--WHERE T=S*S.
+|--NOTE THAT B4 THROUGH B8 ARE STORED IN DOUBLE PRECISION
+|--WHILE B2 AND B3 ARE IN DOUBLE-EXTENDED FORMAT, B1 IS -1/2
+|--AND IS THEREFORE STORED AS SINGLE PRECISION.
+
+	fmulx		%fp0,%fp0	| ...FP0 IS S
+|---HIDE THE NEXT TWO WHILE WAITING FOR FP0
+	fmoved		COSB8,%fp2
+	fmoved		COSB7,%fp3
+|--FP0 IS NOW READY
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1	| ...FP1 IS T
+|--HIDE THE NEXT TWO WHILE WAITING FOR FP1
+	fmovex		%fp0,X(%a6)	| ...X IS S
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+|			...LEAST SIG. BIT OF D0 IN SIGN POSITION
+
+	fmulx		%fp1,%fp2	| ...TB8
+|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
+	eorl		%d0,X(%a6)	| ...X IS NOW S'= SGN*S
+	andil		#0x80000000,%d0
+
+	fmulx		%fp1,%fp3	| ...TB7
+|--HIDE THE NEXT TWO WHILE WAITING FOR THE XU
+	oril		#0x3F800000,%d0	| ...D0 IS SGN IN SINGLE
+	movel		%d0,POSNEG1(%a6)
+
+	faddd		COSB6,%fp2 | ...B6+TB8
+	faddd		COSB5,%fp3 | ...B5+TB7
+
+	fmulx		%fp1,%fp2	| ...T(B6+TB8)
+	fmulx		%fp1,%fp3	| ...T(B5+TB7)
+
+	faddd		COSB4,%fp2 | ...B4+T(B6+TB8)
+	faddx		COSB3,%fp3 | ...B3+T(B5+TB7)
+
+	fmulx		%fp1,%fp2	| ...T(B4+T(B6+TB8))
+	fmulx		%fp3,%fp1	| ...T(B3+T(B5+TB7))
+
+	faddx		COSB2,%fp2 | ...B2+T(B4+T(B6+TB8))
+	fadds		COSB1,%fp1 | ...B1+T(B3+T(B5+TB7))
+
+	fmulx		%fp2,%fp0	| ...S(B2+T(B4+T(B6+TB8)))
+|--FP3 RELEASED, RESTORE NOW AND TAKE SOME ADVANTAGE OF HIDING
+|--FP2 RELEASED.
+
+
+	faddx		%fp1,%fp0
+|--FP1 RELEASED
+
+	fmulx		X(%a6),%fp0
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fadds		POSNEG1(%a6),%fp0	|last inst - possible exception set
+	bra		t_frcinx
+
+
+SINBORS:
+|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
+|--IF |X| < 2**(-40), RETURN X OR 1.
+	cmpil		#0x3FFF8000,%d0
+	bgts		REDUCEX
+
+
+SINSM:
+	movel		ADJN(%a6),%d0
+	cmpil		#0,%d0
+	bgts		COSTINY
+
+SINTINY:
+	movew		#0x0000,XDCARE(%a6)	| ...JUST IN CASE
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fmovex		X(%a6),%fp0		|last inst - possible exception set
+	bra		t_frcinx
+
+
+COSTINY:
+	fmoves		#0x3F800000,%fp0
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fsubs		#0x00800000,%fp0	|last inst - possible exception set
+	bra		t_frcinx
+
+
+REDUCEX:
+|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
+|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
+|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
+
+	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
+	movel		%d2,-(%a7)
+        fmoves         #0x00000000,%fp1
+|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
+|--there is a danger of unwanted overflow in first LOOP iteration.  In this
+|--case, reduce argument by one remainder step to make subsequent reduction
+|--safe.
+	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
+	bnes	LOOP
+	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
+|					;create 2**16383*PI/2
+	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
+	clrl	FP_SCR2+8(%a6)
+	ftstx	%fp0			|test sign of argument
+	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
+|					;PI/2 at FP_SCR3
+	movel	#0x85a308d3,FP_SCR3+4(%a6)
+	clrl   FP_SCR3+8(%a6)
+	fblt	red_neg
+	orw	#0x8000,FP_SCR2(%a6)	|positive arg
+	orw	#0x8000,FP_SCR3(%a6)
+red_neg:
+	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
+	fmovex  %fp0,%fp1		|save high result in fp1
+	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
+	fsubx  %fp0,%fp1			|determine low component of result
+	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.
+
+|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
+|--integer quotient will be stored in N
+|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
+
+LOOP:
+	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
+	movew		INARG(%a6),%d0
+        movel          %d0,%a1		| ...save a copy of D0
+	andil		#0x00007FFF,%d0
+	subil		#0x00003FFF,%d0	| ...D0 IS K
+	cmpil		#28,%d0
+	bles		LASTLOOP
+CONTLOOP:
+	subil		#27,%d0	 | ...D0 IS L := K-27
+	movel		#0,ENDFLAG(%a6)
+	bras		WORK
+LASTLOOP:
+	clrl		%d0		| ...D0 IS L := 0
+	movel		#1,ENDFLAG(%a6)
+
+WORK:
+|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
+|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
+
+|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
+|--2**L * (PIby2_1), 2**L * (PIby2_2)
+
+	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
+	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)
+
+	movel		#0xA2F9836E,FP_SCR1+4(%a6)
+	movel		#0x4E44152A,FP_SCR1+8(%a6)
+	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)
+
+	fmovex		%fp0,%fp2
+	fmulx		FP_SCR1(%a6),%fp2
+|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
+|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
+|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
+|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
+|--US THE DESIRED VALUE IN FLOATING POINT.
+
+|--HIDE SIX CYCLES OF INSTRUCTION
+        movel		%a1,%d2
+        swap		%d2
+	andil		#0x80000000,%d2
+	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
+	movel		%d2,TWOTO63(%a6)
+
+	movel		%d0,%d2
+	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)
+
+|--FP2 IS READY
+	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED
+
+|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
+        movew		%d2,FP_SCR2(%a6)
+	clrw           FP_SCR2+2(%a6)
+	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
+	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1
+
+|--FP2 IS READY
+	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N
+
+	addil		#0x00003FDD,%d0
+        movew		%d0,FP_SCR3(%a6)
+	clrw           FP_SCR3+2(%a6)
+	movel		#0x85A308D3,FP_SCR3+4(%a6)
+	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2
+
+	movel		ENDFLAG(%a6),%d0
+
+|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
+|--P2 = 2**(L) * Piby2_2
+	fmovex		%fp2,%fp4
+	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
+	fmovex		%fp2,%fp5
+	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
+	fmovex		%fp4,%fp3
+|--we want P+p = W+w  but  |p| <= half ulp of P
+|--Then, we need to compute  A := R-P   and  a := r-p
+	faddx		%fp5,%fp3			| ...FP3 is P
+	fsubx		%fp3,%fp4			| ...W-P
+
+	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
+        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w
+
+	fmovex		%fp0,%fp3			| ...FP3 A
+	fsubx		%fp4,%fp1			| ...FP1 is a := r - p
+
+|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
+|--|r| <= half ulp of R.
+	faddx		%fp1,%fp0			| ...FP0 is R := A+a
+|--No need to calculate r if this is the last loop
+	cmpil		#0,%d0
+	bgt		RESTORE
+
+|--Need to calculate r
+	fsubx		%fp0,%fp3			| ...A-R
+	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
+	bra		LOOP
+
+RESTORE:
+        fmovel		%fp2,N(%a6)
+	movel		(%a7)+,%d2
+	fmovemx	(%a7)+,%fp2-%fp5
+
+
+	movel		ADJN(%a6),%d0
+	cmpil		#4,%d0
+
+	blt		SINCONT
+	bras		SCCONT
+
+	.global	ssincosd
+ssincosd:
+|--SIN AND COS OF X FOR DENORMALIZED X
+
+	fmoves		#0x3F800000,%fp1
+	bsr		sto_cos		|store cosine result
+	bra		t_extdnrm
+
+	.global	ssincos
+ssincos:
+|--SET ADJN TO 4
+	movel		#4,ADJN(%a6)
+
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0		| ...COMPACTIFY X
+
+	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
+	bges		SCOK1
+	bra		SCSM
+
+SCOK1:
+	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
+	blts		SCMAIN
+	bra		REDUCEX
+
+
+SCMAIN:
+|--THIS IS THE USUAL CASE, |X| <= 15 PI.
+|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+	fmovex		%fp0,%fp1
+	fmuld		TWOBYPI,%fp1	| ...X*2/PI
+
+|--HIDE THE NEXT THREE INSTRUCTIONS
+	lea		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
+
+
+|--FP1 IS NOW READY
+	fmovel		%fp1,N(%a6)		| ...CONVERT TO INTEGER
+
+	movel		N(%a6),%d0
+	asll		#4,%d0
+	addal		%d0,%a1		| ...ADDRESS OF N*PIBY2, IN Y1, Y2
+
+	fsubx		(%a1)+,%fp0	| ...X-Y1
+        fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
+
+SCCONT:
+|--continuation point from REDUCEX
+
+|--HIDE THE NEXT TWO
+	movel		N(%a6),%d0
+	rorl		#1,%d0
+
+	cmpil		#0,%d0		| ...D0 < 0 IFF N IS ODD
+	bge		NEVEN
+
+NODD:
+|--REGISTERS SAVED SO FAR: D0, A0, FP2.
+
+	fmovex		%fp0,RPRIME(%a6)
+	fmulx		%fp0,%fp0	 | ...FP0 IS S = R*R
+	fmoved		SINA7,%fp1	| ...A7
+	fmoved		COSB8,%fp2	| ...B8
+	fmulx		%fp0,%fp1	 | ...SA7
+	movel		%d2,-(%a7)
+	movel		%d0,%d2
+	fmulx		%fp0,%fp2	 | ...SB8
+	rorl		#1,%d2
+	andil		#0x80000000,%d2
+
+	faddd		SINA6,%fp1	| ...A6+SA7
+	eorl		%d0,%d2
+	andil		#0x80000000,%d2
+	faddd		COSB7,%fp2	| ...B7+SB8
+
+	fmulx		%fp0,%fp1	 | ...S(A6+SA7)
+	eorl		%d2,RPRIME(%a6)
+	movel		(%a7)+,%d2
+	fmulx		%fp0,%fp2	 | ...S(B7+SB8)
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+
+	faddd		SINA5,%fp1	| ...A5+S(A6+SA7)
+	movel		#0x3F800000,POSNEG1(%a6)
+	eorl		%d0,POSNEG1(%a6)
+	faddd		COSB6,%fp2	| ...B6+S(B7+SB8)
+
+	fmulx		%fp0,%fp1	 | ...S(A5+S(A6+SA7))
+	fmulx		%fp0,%fp2	 | ...S(B6+S(B7+SB8))
+	fmovex		%fp0,SPRIME(%a6)
+
+	faddd		SINA4,%fp1	| ...A4+S(A5+S(A6+SA7))
+	eorl		%d0,SPRIME(%a6)
+	faddd		COSB5,%fp2	| ...B5+S(B6+S(B7+SB8))
+
+	fmulx		%fp0,%fp1	 | ...S(A4+...)
+	fmulx		%fp0,%fp2	 | ...S(B5+...)
+
+	faddd		SINA3,%fp1	| ...A3+S(A4+...)
+	faddd		COSB4,%fp2	| ...B4+S(B5+...)
+
+	fmulx		%fp0,%fp1	 | ...S(A3+...)
+	fmulx		%fp0,%fp2	 | ...S(B4+...)
+
+	faddx		SINA2,%fp1	| ...A2+S(A3+...)
+	faddx		COSB3,%fp2	| ...B3+S(B4+...)
+
+	fmulx		%fp0,%fp1	 | ...S(A2+...)
+	fmulx		%fp0,%fp2	 | ...S(B3+...)
+
+	faddx		SINA1,%fp1	| ...A1+S(A2+...)
+	faddx		COSB2,%fp2	| ...B2+S(B3+...)
+
+	fmulx		%fp0,%fp1	 | ...S(A1+...)
+	fmulx		%fp2,%fp0	 | ...S(B2+...)
+
+
+
+	fmulx		RPRIME(%a6),%fp1	| ...R'S(A1+...)
+	fadds		COSB1,%fp0	| ...B1+S(B2...)
+	fmulx		SPRIME(%a6),%fp0	| ...S'(B1+S(B2+...))
+
+	movel		%d1,-(%sp)	|restore users mode & precision
+	andil		#0xff,%d1		|mask off all exceptions
+	fmovel		%d1,%FPCR
+	faddx		RPRIME(%a6),%fp1	| ...COS(X)
+	bsr		sto_cos		|store cosine result
+	fmovel		(%sp)+,%FPCR	|restore users exceptions
+	fadds		POSNEG1(%a6),%fp0	| ...SIN(X)
+
+	bra		t_frcinx
+
+
+NEVEN:
+|--REGISTERS SAVED SO FAR: FP2.
+
+	fmovex		%fp0,RPRIME(%a6)
+	fmulx		%fp0,%fp0	 | ...FP0 IS S = R*R
+	fmoved		COSB8,%fp1			| ...B8
+	fmoved		SINA7,%fp2			| ...A7
+	fmulx		%fp0,%fp1	 | ...SB8
+	fmovex		%fp0,SPRIME(%a6)
+	fmulx		%fp0,%fp2	 | ...SA7
+	rorl		#1,%d0
+	andil		#0x80000000,%d0
+	faddd		COSB7,%fp1	| ...B7+SB8
+	faddd		SINA6,%fp2	| ...A6+SA7
+	eorl		%d0,RPRIME(%a6)
+	eorl		%d0,SPRIME(%a6)
+	fmulx		%fp0,%fp1	 | ...S(B7+SB8)
+	oril		#0x3F800000,%d0
+	movel		%d0,POSNEG1(%a6)
+	fmulx		%fp0,%fp2	 | ...S(A6+SA7)
+
+	faddd		COSB6,%fp1	| ...B6+S(B7+SB8)
+	faddd		SINA5,%fp2	| ...A5+S(A6+SA7)
+
+	fmulx		%fp0,%fp1	 | ...S(B6+S(B7+SB8))
+	fmulx		%fp0,%fp2	 | ...S(A5+S(A6+SA7))
+
+	faddd		COSB5,%fp1	| ...B5+S(B6+S(B7+SB8))
+	faddd		SINA4,%fp2	| ...A4+S(A5+S(A6+SA7))
+
+	fmulx		%fp0,%fp1	 | ...S(B5+...)
+	fmulx		%fp0,%fp2	 | ...S(A4+...)
+
+	faddd		COSB4,%fp1	| ...B4+S(B5+...)
+	faddd		SINA3,%fp2	| ...A3+S(A4+...)
+
+	fmulx		%fp0,%fp1	 | ...S(B4+...)
+	fmulx		%fp0,%fp2	 | ...S(A3+...)
+
+	faddx		COSB3,%fp1	| ...B3+S(B4+...)
+	faddx		SINA2,%fp2	| ...A2+S(A3+...)
+
+	fmulx		%fp0,%fp1	 | ...S(B3+...)
+	fmulx		%fp0,%fp2	 | ...S(A2+...)
+
+	faddx		COSB2,%fp1	| ...B2+S(B3+...)
+	faddx		SINA1,%fp2	| ...A1+S(A2+...)
+
+	fmulx		%fp0,%fp1	 | ...S(B2+...)
+	fmulx		%fp2,%fp0	 | ...s(a1+...)
+
+
+
+	fadds		COSB1,%fp1	| ...B1+S(B2...)
+	fmulx		RPRIME(%a6),%fp0	| ...R'S(A1+...)
+	fmulx		SPRIME(%a6),%fp1	| ...S'(B1+S(B2+...))
+
+	movel		%d1,-(%sp)	|save users mode & precision
+	andil		#0xff,%d1		|mask off all exceptions
+	fmovel		%d1,%FPCR
+	fadds		POSNEG1(%a6),%fp1	| ...COS(X)
+	bsr		sto_cos		|store cosine result
+	fmovel		(%sp)+,%FPCR	|restore users exceptions
+	faddx		RPRIME(%a6),%fp0	| ...SIN(X)
+
+	bra		t_frcinx
+
+SCBORS:
+	cmpil		#0x3FFF8000,%d0
+	bgt		REDUCEX
+
+
+SCSM:
+	movew		#0x0000,XDCARE(%a6)
+	fmoves		#0x3F800000,%fp1
+
+	movel		%d1,-(%sp)	|save users mode & precision
+	andil		#0xff,%d1		|mask off all exceptions
+	fmovel		%d1,%FPCR
+	fsubs		#0x00800000,%fp1
+	bsr		sto_cos		|store cosine result
+	fmovel		(%sp)+,%FPCR	|restore users exceptions
+	fmovex		X(%a6),%fp0
+	bra		t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/ssinh.S b/arch/m68k/fpsp040/ssinh.S
new file mode 100644
index 0000000..c8b3308
--- /dev/null
+++ b/arch/m68k/fpsp040/ssinh.S
@@ -0,0 +1,135 @@
+|
+|	ssinh.sa 3.1 12/10/90
+|
+|       The entry point sSinh computes the hyperbolic sine of
+|       an input argument; sSinhd does the same except for denormalized
+|       input.
+|
+|       Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|       Output: The value sinh(X) returned in floating-point register Fp0.
+|
+|       Accuracy and Monotonicity: The returned result is within 3 ulps in
+|               64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|               result is subsequently rounded to double precision. The
+|               result is provably monotonic in double precision.
+|
+|       Speed: The program sSINH takes approximately 280 cycles.
+|
+|       Algorithm:
+|
+|       SINH
+|       1. If |X| > 16380 log2, go to 3.
+|
+|       2. (|X| <= 16380 log2) Sinh(X) is obtained by the formulae
+|               y = |X|, sgn = sign(X), and z = expm1(Y),
+|               sinh(X) = sgn*(1/2)*( z + z/(1+z) ).
+|          Exit.
+|
+|       3. If |X| > 16480 log2, go to 5.
+|
+|       4. (16380 log2 < |X| <= 16480 log2)
+|               sinh(X) = sign(X) * exp(|X|)/2.
+|          However, invoking exp(|X|) may cause premature overflow.
+|          Thus, we calculate sinh(X) as follows:
+|             Y       := |X|
+|             sgn     := sign(X)
+|             sgnFact := sgn * 2**(16380)
+|             Y'      := Y - 16381 log2
+|             sinh(X) := sgnFact * exp(Y').
+|          Exit.
+|
+|       5. (|X| > 16480 log2) sinh(X) must overflow. Return
+|          sign(X)*Huge*Huge to generate overflow and an infinity with
+|          the appropriate sign. Huge is the largest finite number in
+|          extended format. Exit.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|SSINH	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+T1:	.long 0x40C62D38,0xD3D64634 | ... 16381 LOG2 LEAD
+T2:	.long 0x3D6F90AE,0xB1E75CC7 | ... 16381 LOG2 TRAIL
+
+	|xref	t_frcinx
+	|xref	t_ovfl
+	|xref	t_extdnrm
+	|xref	setox
+	|xref	setoxm1
+
+	.global	ssinhd
+ssinhd:
+|--SINH(X) = X FOR DENORMALIZED X
+
+	bra	t_extdnrm
+
+	.global	ssinh
+ssinh:
+	fmovex	(%a0),%fp0	| ...LOAD INPUT
+
+	movel	(%a0),%d0
+	movew	4(%a0),%d0
+	movel	%d0,%a1		| save a copy of original (compacted) operand
+	andl	#0x7FFFFFFF,%d0
+	cmpl	#0x400CB167,%d0
+	bgts	SINHBIG
+
+|--THIS IS THE USUAL CASE, |X| < 16380 LOG2
+|--Y = |X|, Z = EXPM1(Y), SINH(X) = SIGN(X)*(1/2)*( Z + Z/(1+Z) )
+
+	fabsx	%fp0		| ...Y = |X|
+
+	moveml	%a1/%d1,-(%sp)
+	fmovemx %fp0-%fp0,(%a0)
+	clrl	%d1
+	bsr	setoxm1		| ...FP0 IS Z = EXPM1(Y)
+	fmovel	#0,%fpcr
+	moveml	(%sp)+,%a1/%d1
+
+	fmovex	%fp0,%fp1
+	fadds	#0x3F800000,%fp1	| ...1+Z
+	fmovex	%fp0,-(%sp)
+	fdivx	%fp1,%fp0		| ...Z/(1+Z)
+	movel	%a1,%d0
+	andl	#0x80000000,%d0
+	orl	#0x3F000000,%d0
+	faddx	(%sp)+,%fp0
+	movel	%d0,-(%sp)
+
+	fmovel	%d1,%fpcr
+	fmuls	(%sp)+,%fp0	|last fp inst - possible exceptions set
+
+	bra	t_frcinx
+
+SINHBIG:
+	cmpl	#0x400CB2B3,%d0
+	bgt	t_ovfl
+	fabsx	%fp0
+	fsubd	T1(%pc),%fp0	| ...(|X|-16381LOG2_LEAD)
+	movel	#0,-(%sp)
+	movel	#0x80000000,-(%sp)
+	movel	%a1,%d0
+	andl	#0x80000000,%d0
+	orl	#0x7FFB0000,%d0
+	movel	%d0,-(%sp)	| ...EXTENDED FMT
+	fsubd	T2(%pc),%fp0	| ...|X| - 16381 LOG2, ACCURATE
+
+	movel	%d1,-(%sp)
+	clrl	%d1
+	fmovemx %fp0-%fp0,(%a0)
+	bsr	setox
+	fmovel	(%sp)+,%fpcr
+
+	fmulx	(%sp)+,%fp0	|possible exception
+	bra	t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/stan.S b/arch/m68k/fpsp040/stan.S
new file mode 100644
index 0000000..b5c2a19
--- /dev/null
+++ b/arch/m68k/fpsp040/stan.S
@@ -0,0 +1,455 @@
+|
+|	stan.sa 3.3 7/29/91
+|
+|	The entry point stan computes the tangent of
+|	an input argument;
+|	stand does the same except for denormalized input.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The value tan(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 3 ulp in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program sTAN takes approximately 170 cycles for
+|		input argument X such that |X| < 15Pi, which is the usual
+|		situation.
+|
+|	Algorithm:
+|
+|	1. If |X| >= 15Pi or |X| < 2**(-40), go to 6.
+|
+|	2. Decompose X as X = N(Pi/2) + r where |r| <= Pi/4. Let
+|		k = N mod 2, so in particular, k = 0 or 1.
+|
+|	3. If k is odd, go to 5.
+|
+|	4. (k is even) Tan(X) = tan(r) and tan(r) is approximated by a
+|		rational function U/V where
+|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
+|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))),  s = r*r.
+|		Exit.
+|
+|	4. (k is odd) Tan(X) = -cot(r). Since tan(r) is approximated by a
+|		rational function U/V where
+|		U = r + r*s*(P1 + s*(P2 + s*P3)), and
+|		V = 1 + s*(Q1 + s*(Q2 + s*(Q3 + s*Q4))), s = r*r,
+|		-Cot(r) = -V/U. Exit.
+|
+|	6. If |X| > 1, go to 8.
+|
+|	7. (|X|<2**(-40)) Tan(X) = X. Exit.
+|
+|	8. Overwrite X by X := X rem 2Pi. Now that |X| <= Pi, go back to 2.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|STAN	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+BOUNDS1:	.long 0x3FD78000,0x4004BC7E
+TWOBYPI:	.long 0x3FE45F30,0x6DC9C883
+
+TANQ4:	.long 0x3EA0B759,0xF50F8688
+TANP3:	.long 0xBEF2BAA5,0xA8924F04
+
+TANQ3:	.long 0xBF346F59,0xB39BA65F,0x00000000,0x00000000
+
+TANP2:	.long 0x3FF60000,0xE073D3FC,0x199C4A00,0x00000000
+
+TANQ2:	.long 0x3FF90000,0xD23CD684,0x15D95FA1,0x00000000
+
+TANP1:	.long 0xBFFC0000,0x8895A6C5,0xFB423BCA,0x00000000
+
+TANQ1:	.long 0xBFFD0000,0xEEF57E0D,0xA84BC8CE,0x00000000
+
+INVTWOPI: .long 0x3FFC0000,0xA2F9836E,0x4E44152A,0x00000000
+
+TWOPI1:	.long 0x40010000,0xC90FDAA2,0x00000000,0x00000000
+TWOPI2:	.long 0x3FDF0000,0x85A308D4,0x00000000,0x00000000
+
+|--N*PI/2, -32 <= N <= 32, IN A LEADING TERM IN EXT. AND TRAILING
+|--TERM IN SGL. NOTE THAT PI IS 64-BIT LONG, THUS N*PI/2 IS AT
+|--MOST 69 BITS LONG.
+	.global	PITBL
+PITBL:
+  .long  0xC0040000,0xC90FDAA2,0x2168C235,0x21800000
+  .long  0xC0040000,0xC2C75BCD,0x105D7C23,0xA0D00000
+  .long  0xC0040000,0xBC7EDCF7,0xFF523611,0xA1E80000
+  .long  0xC0040000,0xB6365E22,0xEE46F000,0x21480000
+  .long  0xC0040000,0xAFEDDF4D,0xDD3BA9EE,0xA1200000
+  .long  0xC0040000,0xA9A56078,0xCC3063DD,0x21FC0000
+  .long  0xC0040000,0xA35CE1A3,0xBB251DCB,0x21100000
+  .long  0xC0040000,0x9D1462CE,0xAA19D7B9,0xA1580000
+  .long  0xC0040000,0x96CBE3F9,0x990E91A8,0x21E00000
+  .long  0xC0040000,0x90836524,0x88034B96,0x20B00000
+  .long  0xC0040000,0x8A3AE64F,0x76F80584,0xA1880000
+  .long  0xC0040000,0x83F2677A,0x65ECBF73,0x21C40000
+  .long  0xC0030000,0xFB53D14A,0xA9C2F2C2,0x20000000
+  .long  0xC0030000,0xEEC2D3A0,0x87AC669F,0x21380000
+  .long  0xC0030000,0xE231D5F6,0x6595DA7B,0xA1300000
+  .long  0xC0030000,0xD5A0D84C,0x437F4E58,0x9FC00000
+  .long  0xC0030000,0xC90FDAA2,0x2168C235,0x21000000
+  .long  0xC0030000,0xBC7EDCF7,0xFF523611,0xA1680000
+  .long  0xC0030000,0xAFEDDF4D,0xDD3BA9EE,0xA0A00000
+  .long  0xC0030000,0xA35CE1A3,0xBB251DCB,0x20900000
+  .long  0xC0030000,0x96CBE3F9,0x990E91A8,0x21600000
+  .long  0xC0030000,0x8A3AE64F,0x76F80584,0xA1080000
+  .long  0xC0020000,0xFB53D14A,0xA9C2F2C2,0x1F800000
+  .long  0xC0020000,0xE231D5F6,0x6595DA7B,0xA0B00000
+  .long  0xC0020000,0xC90FDAA2,0x2168C235,0x20800000
+  .long  0xC0020000,0xAFEDDF4D,0xDD3BA9EE,0xA0200000
+  .long  0xC0020000,0x96CBE3F9,0x990E91A8,0x20E00000
+  .long  0xC0010000,0xFB53D14A,0xA9C2F2C2,0x1F000000
+  .long  0xC0010000,0xC90FDAA2,0x2168C235,0x20000000
+  .long  0xC0010000,0x96CBE3F9,0x990E91A8,0x20600000
+  .long  0xC0000000,0xC90FDAA2,0x2168C235,0x1F800000
+  .long  0xBFFF0000,0xC90FDAA2,0x2168C235,0x1F000000
+  .long  0x00000000,0x00000000,0x00000000,0x00000000
+  .long  0x3FFF0000,0xC90FDAA2,0x2168C235,0x9F000000
+  .long  0x40000000,0xC90FDAA2,0x2168C235,0x9F800000
+  .long  0x40010000,0x96CBE3F9,0x990E91A8,0xA0600000
+  .long  0x40010000,0xC90FDAA2,0x2168C235,0xA0000000
+  .long  0x40010000,0xFB53D14A,0xA9C2F2C2,0x9F000000
+  .long  0x40020000,0x96CBE3F9,0x990E91A8,0xA0E00000
+  .long  0x40020000,0xAFEDDF4D,0xDD3BA9EE,0x20200000
+  .long  0x40020000,0xC90FDAA2,0x2168C235,0xA0800000
+  .long  0x40020000,0xE231D5F6,0x6595DA7B,0x20B00000
+  .long  0x40020000,0xFB53D14A,0xA9C2F2C2,0x9F800000
+  .long  0x40030000,0x8A3AE64F,0x76F80584,0x21080000
+  .long  0x40030000,0x96CBE3F9,0x990E91A8,0xA1600000
+  .long  0x40030000,0xA35CE1A3,0xBB251DCB,0xA0900000
+  .long  0x40030000,0xAFEDDF4D,0xDD3BA9EE,0x20A00000
+  .long  0x40030000,0xBC7EDCF7,0xFF523611,0x21680000
+  .long  0x40030000,0xC90FDAA2,0x2168C235,0xA1000000
+  .long  0x40030000,0xD5A0D84C,0x437F4E58,0x1FC00000
+  .long  0x40030000,0xE231D5F6,0x6595DA7B,0x21300000
+  .long  0x40030000,0xEEC2D3A0,0x87AC669F,0xA1380000
+  .long  0x40030000,0xFB53D14A,0xA9C2F2C2,0xA0000000
+  .long  0x40040000,0x83F2677A,0x65ECBF73,0xA1C40000
+  .long  0x40040000,0x8A3AE64F,0x76F80584,0x21880000
+  .long  0x40040000,0x90836524,0x88034B96,0xA0B00000
+  .long  0x40040000,0x96CBE3F9,0x990E91A8,0xA1E00000
+  .long  0x40040000,0x9D1462CE,0xAA19D7B9,0x21580000
+  .long  0x40040000,0xA35CE1A3,0xBB251DCB,0xA1100000
+  .long  0x40040000,0xA9A56078,0xCC3063DD,0xA1FC0000
+  .long  0x40040000,0xAFEDDF4D,0xDD3BA9EE,0x21200000
+  .long  0x40040000,0xB6365E22,0xEE46F000,0xA1480000
+  .long  0x40040000,0xBC7EDCF7,0xFF523611,0x21E80000
+  .long  0x40040000,0xC2C75BCD,0x105D7C23,0x20D00000
+  .long  0x40040000,0xC90FDAA2,0x2168C235,0xA1800000
+
+	.set	INARG,FP_SCR4
+
+	.set	TWOTO63,L_SCR1
+	.set	ENDFLAG,L_SCR2
+	.set	N,L_SCR3
+
+	| xref	t_frcinx
+	|xref	t_extdnrm
+
+	.global	stand
+stand:
+|--TAN(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	stan
+stan:
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FD78000,%d0		| ...|X| >= 2**(-40)?
+	bges		TANOK1
+	bra		TANSM
+TANOK1:
+	cmpil		#0x4004BC7E,%d0		| ...|X| < 15 PI?
+	blts		TANMAIN
+	bra		REDUCEX
+
+
+TANMAIN:
+|--THIS IS THE USUAL CASE, |X| <= 15 PI.
+|--THE ARGUMENT REDUCTION IS DONE BY TABLE LOOK UP.
+	fmovex		%fp0,%fp1
+	fmuld		TWOBYPI,%fp1	| ...X*2/PI
+
+|--HIDE THE NEXT TWO INSTRUCTIONS
+	leal		PITBL+0x200,%a1 | ...TABLE OF N*PI/2, N = -32,...,32
+
+|--FP1 IS NOW READY
+	fmovel		%fp1,%d0		| ...CONVERT TO INTEGER
+
+	asll		#4,%d0
+	addal		%d0,%a1		| ...ADDRESS N*PIBY2 IN Y1, Y2
+
+	fsubx		(%a1)+,%fp0	| ...X-Y1
+|--HIDE THE NEXT ONE
+
+	fsubs		(%a1),%fp0	| ...FP0 IS R = (X-Y1)-Y2
+
+	rorl		#5,%d0
+	andil		#0x80000000,%d0	| ...D0 WAS ODD IFF D0 < 0
+
+TANCONT:
+
+	cmpil		#0,%d0
+	blt		NODD
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		| ...S = R*R
+
+	fmoved		TANQ4,%fp3
+	fmoved		TANP3,%fp2
+
+	fmulx		%fp1,%fp3		| ...SQ4
+	fmulx		%fp1,%fp2		| ...SP3
+
+	faddd		TANQ3,%fp3	| ...Q3+SQ4
+	faddx		TANP2,%fp2	| ...P2+SP3
+
+	fmulx		%fp1,%fp3		| ...S(Q3+SQ4)
+	fmulx		%fp1,%fp2		| ...S(P2+SP3)
+
+	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
+	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)
+
+	fmulx		%fp1,%fp3		| ...S(Q2+S(Q3+SQ4))
+	fmulx		%fp1,%fp2		| ...S(P1+S(P2+SP3))
+
+	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
+	fmulx		%fp0,%fp2		| ...RS(P1+S(P2+SP3))
+
+	fmulx		%fp3,%fp1		| ...S(Q1+S(Q2+S(Q3+SQ4)))
+
+
+	faddx		%fp2,%fp0		| ...R+RS(P1+S(P2+SP3))
+
+
+	fadds		#0x3F800000,%fp1	| ...1+S(Q1+...)
+
+	fmovel		%d1,%fpcr		|restore users exceptions
+	fdivx		%fp1,%fp0		|last inst - possible exception set
+
+	bra		t_frcinx
+
+NODD:
+	fmovex		%fp0,%fp1
+	fmulx		%fp0,%fp0		| ...S = R*R
+
+	fmoved		TANQ4,%fp3
+	fmoved		TANP3,%fp2
+
+	fmulx		%fp0,%fp3		| ...SQ4
+	fmulx		%fp0,%fp2		| ...SP3
+
+	faddd		TANQ3,%fp3	| ...Q3+SQ4
+	faddx		TANP2,%fp2	| ...P2+SP3
+
+	fmulx		%fp0,%fp3		| ...S(Q3+SQ4)
+	fmulx		%fp0,%fp2		| ...S(P2+SP3)
+
+	faddx		TANQ2,%fp3	| ...Q2+S(Q3+SQ4)
+	faddx		TANP1,%fp2	| ...P1+S(P2+SP3)
+
+	fmulx		%fp0,%fp3		| ...S(Q2+S(Q3+SQ4))
+	fmulx		%fp0,%fp2		| ...S(P1+S(P2+SP3))
+
+	faddx		TANQ1,%fp3	| ...Q1+S(Q2+S(Q3+SQ4))
+	fmulx		%fp1,%fp2		| ...RS(P1+S(P2+SP3))
+
+	fmulx		%fp3,%fp0		| ...S(Q1+S(Q2+S(Q3+SQ4)))
+
+
+	faddx		%fp2,%fp1		| ...R+RS(P1+S(P2+SP3))
+	fadds		#0x3F800000,%fp0	| ...1+S(Q1+...)
+
+
+	fmovex		%fp1,-(%sp)
+	eoril		#0x80000000,(%sp)
+
+	fmovel		%d1,%fpcr		|restore users exceptions
+	fdivx		(%sp)+,%fp0	|last inst - possible exception set
+
+	bra		t_frcinx
+
+TANBORS:
+|--IF |X| > 15PI, WE USE THE GENERAL ARGUMENT REDUCTION.
+|--IF |X| < 2**(-40), RETURN X OR 1.
+	cmpil		#0x3FFF8000,%d0
+	bgts		REDUCEX
+
+TANSM:
+
+	fmovex		%fp0,-(%sp)
+	fmovel		%d1,%fpcr		 |restore users exceptions
+	fmovex		(%sp)+,%fp0	|last inst - possible exception set
+
+	bra		t_frcinx
+
+
+REDUCEX:
+|--WHEN REDUCEX IS USED, THE CODE WILL INEVITABLY BE SLOW.
+|--THIS REDUCTION METHOD, HOWEVER, IS MUCH FASTER THAN USING
+|--THE REMAINDER INSTRUCTION WHICH IS NOW IN SOFTWARE.
+
+	fmovemx	%fp2-%fp5,-(%a7)	| ...save FP2 through FP5
+	movel		%d2,-(%a7)
+        fmoves         #0x00000000,%fp1
+
+|--If compact form of abs(arg) in d0=$7ffeffff, argument is so large that
+|--there is a danger of unwanted overflow in first LOOP iteration.  In this
+|--case, reduce argument by one remainder step to make subsequent reduction
+|--safe.
+	cmpil	#0x7ffeffff,%d0		|is argument dangerously large?
+	bnes	LOOP
+	movel	#0x7ffe0000,FP_SCR2(%a6)	|yes
+|					;create 2**16383*PI/2
+	movel	#0xc90fdaa2,FP_SCR2+4(%a6)
+	clrl	FP_SCR2+8(%a6)
+	ftstx	%fp0			|test sign of argument
+	movel	#0x7fdc0000,FP_SCR3(%a6)	|create low half of 2**16383*
+|					;PI/2 at FP_SCR3
+	movel	#0x85a308d3,FP_SCR3+4(%a6)
+	clrl   FP_SCR3+8(%a6)
+	fblt	red_neg
+	orw	#0x8000,FP_SCR2(%a6)	|positive arg
+	orw	#0x8000,FP_SCR3(%a6)
+red_neg:
+	faddx  FP_SCR2(%a6),%fp0		|high part of reduction is exact
+	fmovex  %fp0,%fp1		|save high result in fp1
+	faddx  FP_SCR3(%a6),%fp0		|low part of reduction
+	fsubx  %fp0,%fp1			|determine low component of result
+	faddx  FP_SCR3(%a6),%fp1		|fp0/fp1 are reduced argument.
+
+|--ON ENTRY, FP0 IS X, ON RETURN, FP0 IS X REM PI/2, |X| <= PI/4.
+|--integer quotient will be stored in N
+|--Intermediate remainder is 66-bit long; (R,r) in (FP0,FP1)
+
+LOOP:
+	fmovex		%fp0,INARG(%a6)	| ...+-2**K * F, 1 <= F < 2
+	movew		INARG(%a6),%d0
+        movel          %d0,%a1		| ...save a copy of D0
+	andil		#0x00007FFF,%d0
+	subil		#0x00003FFF,%d0	| ...D0 IS K
+	cmpil		#28,%d0
+	bles		LASTLOOP
+CONTLOOP:
+	subil		#27,%d0	 | ...D0 IS L := K-27
+	movel		#0,ENDFLAG(%a6)
+	bras		WORK
+LASTLOOP:
+	clrl		%d0		| ...D0 IS L := 0
+	movel		#1,ENDFLAG(%a6)
+
+WORK:
+|--FIND THE REMAINDER OF (R,r) W.R.T.	2**L * (PI/2). L IS SO CHOSEN
+|--THAT	INT( X * (2/PI) / 2**(L) ) < 2**29.
+
+|--CREATE 2**(-L) * (2/PI), SIGN(INARG)*2**(63),
+|--2**L * (PIby2_1), 2**L * (PIby2_2)
+
+	movel		#0x00003FFE,%d2	| ...BIASED EXPO OF 2/PI
+	subl		%d0,%d2		| ...BIASED EXPO OF 2**(-L)*(2/PI)
+
+	movel		#0xA2F9836E,FP_SCR1+4(%a6)
+	movel		#0x4E44152A,FP_SCR1+8(%a6)
+	movew		%d2,FP_SCR1(%a6)	| ...FP_SCR1 is 2**(-L)*(2/PI)
+
+	fmovex		%fp0,%fp2
+	fmulx		FP_SCR1(%a6),%fp2
+|--WE MUST NOW FIND INT(FP2). SINCE WE NEED THIS VALUE IN
+|--FLOATING POINT FORMAT, THE TWO FMOVE'S	FMOVE.L FP <--> N
+|--WILL BE TOO INEFFICIENT. THE WAY AROUND IT IS THAT
+|--(SIGN(INARG)*2**63	+	FP2) - SIGN(INARG)*2**63 WILL GIVE
+|--US THE DESIRED VALUE IN FLOATING POINT.
+
+|--HIDE SIX CYCLES OF INSTRUCTION
+        movel		%a1,%d2
+        swap		%d2
+	andil		#0x80000000,%d2
+	oril		#0x5F000000,%d2	| ...D2 IS SIGN(INARG)*2**63 IN SGL
+	movel		%d2,TWOTO63(%a6)
+
+	movel		%d0,%d2
+	addil		#0x00003FFF,%d2	| ...BIASED EXPO OF 2**L * (PI/2)
+
+|--FP2 IS READY
+	fadds		TWOTO63(%a6),%fp2	| ...THE FRACTIONAL PART OF FP1 IS ROUNDED
+
+|--HIDE 4 CYCLES OF INSTRUCTION; creating 2**(L)*Piby2_1  and  2**(L)*Piby2_2
+        movew		%d2,FP_SCR2(%a6)
+	clrw           FP_SCR2+2(%a6)
+	movel		#0xC90FDAA2,FP_SCR2+4(%a6)
+	clrl		FP_SCR2+8(%a6)		| ...FP_SCR2 is  2**(L) * Piby2_1
+
+|--FP2 IS READY
+	fsubs		TWOTO63(%a6),%fp2		| ...FP2 is N
+
+	addil		#0x00003FDD,%d0
+        movew		%d0,FP_SCR3(%a6)
+	clrw           FP_SCR3+2(%a6)
+	movel		#0x85A308D3,FP_SCR3+4(%a6)
+	clrl		FP_SCR3+8(%a6)		| ...FP_SCR3 is 2**(L) * Piby2_2
+
+	movel		ENDFLAG(%a6),%d0
+
+|--We are now ready to perform (R+r) - N*P1 - N*P2, P1 = 2**(L) * Piby2_1 and
+|--P2 = 2**(L) * Piby2_2
+	fmovex		%fp2,%fp4
+	fmulx		FP_SCR2(%a6),%fp4		| ...W = N*P1
+	fmovex		%fp2,%fp5
+	fmulx		FP_SCR3(%a6),%fp5		| ...w = N*P2
+	fmovex		%fp4,%fp3
+|--we want P+p = W+w  but  |p| <= half ulp of P
+|--Then, we need to compute  A := R-P   and  a := r-p
+	faddx		%fp5,%fp3			| ...FP3 is P
+	fsubx		%fp3,%fp4			| ...W-P
+
+	fsubx		%fp3,%fp0			| ...FP0 is A := R - P
+        faddx		%fp5,%fp4			| ...FP4 is p = (W-P)+w
+
+	fmovex		%fp0,%fp3			| ...FP3 A
+	fsubx		%fp4,%fp1			| ...FP1 is a := r - p
+
+|--Now we need to normalize (A,a) to  "new (R,r)" where R+r = A+a but
+|--|r| <= half ulp of R.
+	faddx		%fp1,%fp0			| ...FP0 is R := A+a
+|--No need to calculate r if this is the last loop
+	cmpil		#0,%d0
+	bgt		RESTORE
+
+|--Need to calculate r
+	fsubx		%fp0,%fp3			| ...A-R
+	faddx		%fp3,%fp1			| ...FP1 is r := (A-R)+a
+	bra		LOOP
+
+RESTORE:
+        fmovel		%fp2,N(%a6)
+	movel		(%a7)+,%d2
+	fmovemx	(%a7)+,%fp2-%fp5
+
+
+	movel		N(%a6),%d0
+        rorl		#1,%d0
+
+
+	bra		TANCONT
+
+	|end
diff --git a/arch/m68k/fpsp040/stanh.S b/arch/m68k/fpsp040/stanh.S
new file mode 100644
index 0000000..33b0098
--- /dev/null
+++ b/arch/m68k/fpsp040/stanh.S
@@ -0,0 +1,185 @@
+|
+|	stanh.sa 3.1 12/10/90
+|
+|	The entry point sTanh computes the hyperbolic tangent of
+|	an input argument; sTanhd does the same except for denormalized
+|	input.
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The value tanh(X) returned in floating-point register Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 3 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program stanh takes approximately 270 cycles.
+|
+|	Algorithm:
+|
+|	TANH
+|	1. If |X| >= (5/2) log2 or |X| <= 2**(-40), go to 3.
+|
+|	2. (2**(-40) < |X| < (5/2) log2) Calculate tanh(X) by
+|		sgn := sign(X), y := 2|X|, z := expm1(Y), and
+|		tanh(X) = sgn*( z/(2+z) ).
+|		Exit.
+|
+|	3. (|X| <= 2**(-40) or |X| >= (5/2) log2). If |X| < 1,
+|		go to 7.
+|
+|	4. (|X| >= (5/2) log2) If |X| >= 50 log2, go to 6.
+|
+|	5. ((5/2) log2 <= |X| < 50 log2) Calculate tanh(X) by
+|		sgn := sign(X), y := 2|X|, z := exp(Y),
+|		tanh(X) = sgn - [ sgn*2/(1+z) ].
+|		Exit.
+|
+|	6. (|X| >= 50 log2) Tanh(X) = +-1 (round to nearest). Thus, we
+|		calculate Tanh(X) by
+|		sgn := sign(X), Tiny := 2**(-126),
+|		tanh(X) := sgn - sgn*Tiny.
+|		Exit.
+|
+|	7. (|X| < 2**(-40)). Tanh(X) = X.	Exit.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|STANH	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	.set	X,FP_SCR5
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	SGN,L_SCR3
+
+	.set	V,FP_SCR6
+
+BOUNDS1:	.long 0x3FD78000,0x3FFFDDCE | ... 2^(-40), (5/2)LOG2
+
+	|xref	t_frcinx
+	|xref	t_extdnrm
+	|xref	setox
+	|xref	setoxm1
+
+	.global	stanhd
+stanhd:
+|--TANH(X) = X FOR DENORMALIZED X
+
+	bra		t_extdnrm
+
+	.global	stanh
+stanh:
+	fmovex		(%a0),%fp0	| ...LOAD INPUT
+
+	fmovex		%fp0,X(%a6)
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	movel		%d0,X(%a6)
+	andl		#0x7FFFFFFF,%d0
+	cmp2l		BOUNDS1(%pc),%d0	| ...2**(-40) < |X| < (5/2)LOG2 ?
+	bcss		TANHBORS
+
+|--THIS IS THE USUAL CASE
+|--Y = 2|X|, Z = EXPM1(Y), TANH(X) = SIGN(X) * Z / (Z+2).
+
+	movel		X(%a6),%d0
+	movel		%d0,SGN(%a6)
+	andl		#0x7FFF0000,%d0
+	addl		#0x00010000,%d0	| ...EXPONENT OF 2|X|
+	movel		%d0,X(%a6)
+	andl		#0x80000000,SGN(%a6)
+	fmovex		X(%a6),%fp0		| ...FP0 IS Y = 2|X|
+
+	movel		%d1,-(%a7)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		setoxm1		| ...FP0 IS Z = EXPM1(Y)
+	movel		(%a7)+,%d1
+
+	fmovex		%fp0,%fp1
+	fadds		#0x40000000,%fp1	| ...Z+2
+	movel		SGN(%a6),%d0
+	fmovex		%fp1,V(%a6)
+	eorl		%d0,V(%a6)
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fdivx		V(%a6),%fp0
+	bra		t_frcinx
+
+TANHBORS:
+	cmpl		#0x3FFF8000,%d0
+	blt		TANHSM
+
+	cmpl		#0x40048AA1,%d0
+	bgt		TANHHUGE
+
+|-- (5/2) LOG2 < |X| < 50 LOG2,
+|--TANH(X) = 1 - (2/[EXP(2X)+1]). LET Y = 2|X|, SGN = SIGN(X),
+|--TANH(X) = SGN -	SGN*2/[EXP(Y)+1].
+
+	movel		X(%a6),%d0
+	movel		%d0,SGN(%a6)
+	andl		#0x7FFF0000,%d0
+	addl		#0x00010000,%d0	| ...EXPO OF 2|X|
+	movel		%d0,X(%a6)		| ...Y = 2|X|
+	andl		#0x80000000,SGN(%a6)
+	movel		SGN(%a6),%d0
+	fmovex		X(%a6),%fp0		| ...Y = 2|X|
+
+	movel		%d1,-(%a7)
+	clrl		%d1
+	fmovemx	%fp0-%fp0,(%a0)
+	bsr		setox		| ...FP0 IS EXP(Y)
+	movel		(%a7)+,%d1
+	movel		SGN(%a6),%d0
+	fadds		#0x3F800000,%fp0	| ...EXP(Y)+1
+
+	eorl		#0xC0000000,%d0	| ...-SIGN(X)*2
+	fmoves		%d0,%fp1		| ...-SIGN(X)*2 IN SGL FMT
+	fdivx		%fp0,%fp1		| ...-SIGN(X)2 / [EXP(Y)+1 ]
+
+	movel		SGN(%a6),%d0
+	orl		#0x3F800000,%d0	| ...SGN
+	fmoves		%d0,%fp0		| ...SGN IN SGL FMT
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	faddx		%fp1,%fp0
+
+	bra		t_frcinx
+
+TANHSM:
+	movew		#0x0000,XDCARE(%a6)
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fmovex		X(%a6),%fp0		|last inst - possible exception set
+
+	bra		t_frcinx
+
+TANHHUGE:
+|---RETURN SGN(X) - SGN(X)EPS
+	movel		X(%a6),%d0
+	andl		#0x80000000,%d0
+	orl		#0x3F800000,%d0
+	fmoves		%d0,%fp0
+	andl		#0x80000000,%d0
+	eorl		#0x80800000,%d0	| ...-SIGN(X)*EPS
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fadds		%d0,%fp0
+
+	bra		t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/sto_res.S b/arch/m68k/fpsp040/sto_res.S
new file mode 100644
index 0000000..0cdca3b
--- /dev/null
+++ b/arch/m68k/fpsp040/sto_res.S
@@ -0,0 +1,98 @@
+|
+|	sto_res.sa 3.1 12/10/90
+|
+|	Takes the result and puts it in where the user expects it.
+|	Library functions return result in fp0.	If fp0 is not the
+|	users destination register then fp0 is moved to the
+|	correct floating-point destination register.  fp0 and fp1
+|	are then restored to the original contents.
+|
+|	Input:	result in fp0,fp1
+|
+|		d2 & a0 should be kept unmodified
+|
+|	Output:	moves the result to the true destination reg or mem
+|
+|	Modifies: destination floating point register
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+STO_RES:	|idnt	2,1 | Motorola 040 Floating Point Software Package
+
+
+	|section	8
+
+#include "fpsp.h"
+
+	.global	sto_cos
+sto_cos:
+	bfextu		CMDREG1B(%a6){#13:#3},%d0	|extract cos destination
+	cmpib		#3,%d0		|check for fp0/fp1 cases
+	bles		c_fp0123
+	fmovemx	%fp1-%fp1,-(%a7)
+	moveql		#7,%d1
+	subl		%d0,%d1		|d1 = 7- (dest. reg. no.)
+	clrl		%d0
+	bsetl		%d1,%d0		|d0 is dynamic register mask
+	fmovemx	(%a7)+,%d0
+	rts
+c_fp0123:
+	cmpib		#0,%d0
+	beqs		c_is_fp0
+	cmpib		#1,%d0
+	beqs		c_is_fp1
+	cmpib		#2,%d0
+	beqs		c_is_fp2
+c_is_fp3:
+	fmovemx	%fp1-%fp1,USER_FP3(%a6)
+	rts
+c_is_fp2:
+	fmovemx	%fp1-%fp1,USER_FP2(%a6)
+	rts
+c_is_fp1:
+	fmovemx	%fp1-%fp1,USER_FP1(%a6)
+	rts
+c_is_fp0:
+	fmovemx	%fp1-%fp1,USER_FP0(%a6)
+	rts
+
+
+	.global	sto_res
+sto_res:
+	bfextu		CMDREG1B(%a6){#6:#3},%d0	|extract destination register
+	cmpib		#3,%d0		|check for fp0/fp1 cases
+	bles		fp0123
+	fmovemx	%fp0-%fp0,-(%a7)
+	moveql		#7,%d1
+	subl		%d0,%d1		|d1 = 7- (dest. reg. no.)
+	clrl		%d0
+	bsetl		%d1,%d0		|d0 is dynamic register mask
+	fmovemx	(%a7)+,%d0
+	rts
+fp0123:
+	cmpib		#0,%d0
+	beqs		is_fp0
+	cmpib		#1,%d0
+	beqs		is_fp1
+	cmpib		#2,%d0
+	beqs		is_fp2
+is_fp3:
+	fmovemx	%fp0-%fp0,USER_FP3(%a6)
+	rts
+is_fp2:
+	fmovemx	%fp0-%fp0,USER_FP2(%a6)
+	rts
+is_fp1:
+	fmovemx	%fp0-%fp0,USER_FP1(%a6)
+	rts
+is_fp0:
+	fmovemx	%fp0-%fp0,USER_FP0(%a6)
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/stwotox.S b/arch/m68k/fpsp040/stwotox.S
new file mode 100644
index 0000000..4e3c140
--- /dev/null
+++ b/arch/m68k/fpsp040/stwotox.S
@@ -0,0 +1,427 @@
+|
+|	stwotox.sa 3.1 12/10/90
+|
+|	stwotox  --- 2**X
+|	stwotoxd --- 2**X for denormalized X
+|	stentox  --- 10**X
+|	stentoxd --- 10**X for denormalized X
+|
+|	Input: Double-extended number X in location pointed to
+|		by address register a0.
+|
+|	Output: The function values are returned in Fp0.
+|
+|	Accuracy and Monotonicity: The returned result is within 2 ulps in
+|		64 significant bit, i.e. within 0.5001 ulp to 53 bits if the
+|		result is subsequently rounded to double precision. The
+|		result is provably monotonic in double precision.
+|
+|	Speed: The program stwotox takes approximately 190 cycles and the
+|		program stentox takes approximately 200 cycles.
+|
+|	Algorithm:
+|
+|	twotox
+|	1. If |X| > 16480, go to ExpBig.
+|
+|	2. If |X| < 2**(-70), go to ExpSm.
+|
+|	3. Decompose X as X = N/64 + r where |r| <= 1/128. Furthermore
+|		decompose N as
+|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
+|
+|	4. Overwrite r := r * log2. Then
+|		2**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
+|		Go to expr to compute that expression.
+|
+|	tentox
+|	1. If |X| > 16480*log_10(2) (base 10 log of 2), go to ExpBig.
+|
+|	2. If |X| < 2**(-70), go to ExpSm.
+|
+|	3. Set y := X*log_2(10)*64 (base 2 log of 10). Set
+|		N := round-to-int(y). Decompose N as
+|		 N = 64(M + M') + j,  j = 0,1,2,...,63.
+|
+|	4. Define r as
+|		r := ((X - N*L1)-N*L2) * L10
+|		where L1, L2 are the leading and trailing parts of log_10(2)/64
+|		and L10 is the natural log of 10. Then
+|		10**X = 2**(M') * 2**(M) * 2**(j/64) * exp(r).
+|		Go to expr to compute that expression.
+|
+|	expr
+|	1. Fetch 2**(j/64) from table as Fact1 and Fact2.
+|
+|	2. Overwrite Fact1 and Fact2 by
+|		Fact1 := 2**(M) * Fact1
+|		Fact2 := 2**(M) * Fact2
+|		Thus Fact1 + Fact2 = 2**(M) * 2**(j/64).
+|
+|	3. Calculate P where 1 + P approximates exp(r):
+|		P = r + r*r*(A1+r*(A2+...+r*A5)).
+|
+|	4. Let AdjFact := 2**(M'). Return
+|		AdjFact * ( Fact1 + ((Fact1*P) + Fact2) ).
+|		Exit.
+|
+|	ExpBig
+|	1. Generate overflow by Huge * Huge if X > 0; otherwise, generate
+|		underflow by Tiny * Tiny.
+|
+|	ExpSm
+|	1. Return 1 + X.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|STWOTOX	idnt	2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+BOUNDS1:	.long 0x3FB98000,0x400D80C0 | ... 2^(-70),16480
+BOUNDS2:	.long 0x3FB98000,0x400B9B07 | ... 2^(-70),16480 LOG2/LOG10
+
+L2TEN64:	.long 0x406A934F,0x0979A371 | ... 64LOG10/LOG2
+L10TWO1:	.long 0x3F734413,0x509F8000 | ... LOG2/64LOG10
+
+L10TWO2:	.long 0xBFCD0000,0xC0219DC1,0xDA994FD2,0x00000000
+
+LOG10:	.long 0x40000000,0x935D8DDD,0xAAA8AC17,0x00000000
+
+LOG2:	.long 0x3FFE0000,0xB17217F7,0xD1CF79AC,0x00000000
+
+EXPA5:	.long 0x3F56C16D,0x6F7BD0B2
+EXPA4:	.long 0x3F811112,0x302C712C
+EXPA3:	.long 0x3FA55555,0x55554CC1
+EXPA2:	.long 0x3FC55555,0x55554A54
+EXPA1:	.long 0x3FE00000,0x00000000,0x00000000,0x00000000
+
+HUGE:	.long 0x7FFE0000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+TINY:	.long 0x00010000,0xFFFFFFFF,0xFFFFFFFF,0x00000000
+
+EXPTBL:
+	.long  0x3FFF0000,0x80000000,0x00000000,0x3F738000
+	.long  0x3FFF0000,0x8164D1F3,0xBC030773,0x3FBEF7CA
+	.long  0x3FFF0000,0x82CD8698,0xAC2BA1D7,0x3FBDF8A9
+	.long  0x3FFF0000,0x843A28C3,0xACDE4046,0x3FBCD7C9
+	.long  0x3FFF0000,0x85AAC367,0xCC487B15,0xBFBDE8DA
+	.long  0x3FFF0000,0x871F6196,0x9E8D1010,0x3FBDE85C
+	.long  0x3FFF0000,0x88980E80,0x92DA8527,0x3FBEBBF1
+	.long  0x3FFF0000,0x8A14D575,0x496EFD9A,0x3FBB80CA
+	.long  0x3FFF0000,0x8B95C1E3,0xEA8BD6E7,0xBFBA8373
+	.long  0x3FFF0000,0x8D1ADF5B,0x7E5BA9E6,0xBFBE9670
+	.long  0x3FFF0000,0x8EA4398B,0x45CD53C0,0x3FBDB700
+	.long  0x3FFF0000,0x9031DC43,0x1466B1DC,0x3FBEEEB0
+	.long  0x3FFF0000,0x91C3D373,0xAB11C336,0x3FBBFD6D
+	.long  0x3FFF0000,0x935A2B2F,0x13E6E92C,0xBFBDB319
+	.long  0x3FFF0000,0x94F4EFA8,0xFEF70961,0x3FBDBA2B
+	.long  0x3FFF0000,0x96942D37,0x20185A00,0x3FBE91D5
+	.long  0x3FFF0000,0x9837F051,0x8DB8A96F,0x3FBE8D5A
+	.long  0x3FFF0000,0x99E04593,0x20B7FA65,0xBFBCDE7B
+	.long  0x3FFF0000,0x9B8D39B9,0xD54E5539,0xBFBEBAAF
+	.long  0x3FFF0000,0x9D3ED9A7,0x2CFFB751,0xBFBD86DA
+	.long  0x3FFF0000,0x9EF53260,0x91A111AE,0xBFBEBEDD
+	.long  0x3FFF0000,0xA0B0510F,0xB9714FC2,0x3FBCC96E
+	.long  0x3FFF0000,0xA2704303,0x0C496819,0xBFBEC90B
+	.long  0x3FFF0000,0xA43515AE,0x09E6809E,0x3FBBD1DB
+	.long  0x3FFF0000,0xA5FED6A9,0xB15138EA,0x3FBCE5EB
+	.long  0x3FFF0000,0xA7CD93B4,0xE965356A,0xBFBEC274
+	.long  0x3FFF0000,0xA9A15AB4,0xEA7C0EF8,0x3FBEA83C
+	.long  0x3FFF0000,0xAB7A39B5,0xA93ED337,0x3FBECB00
+	.long  0x3FFF0000,0xAD583EEA,0x42A14AC6,0x3FBE9301
+	.long  0x3FFF0000,0xAF3B78AD,0x690A4375,0xBFBD8367
+	.long  0x3FFF0000,0xB123F581,0xD2AC2590,0xBFBEF05F
+	.long  0x3FFF0000,0xB311C412,0xA9112489,0x3FBDFB3C
+	.long  0x3FFF0000,0xB504F333,0xF9DE6484,0x3FBEB2FB
+	.long  0x3FFF0000,0xB6FD91E3,0x28D17791,0x3FBAE2CB
+	.long  0x3FFF0000,0xB8FBAF47,0x62FB9EE9,0x3FBCDC3C
+	.long  0x3FFF0000,0xBAFF5AB2,0x133E45FB,0x3FBEE9AA
+	.long  0x3FFF0000,0xBD08A39F,0x580C36BF,0xBFBEAEFD
+	.long  0x3FFF0000,0xBF1799B6,0x7A731083,0xBFBCBF51
+	.long  0x3FFF0000,0xC12C4CCA,0x66709456,0x3FBEF88A
+	.long  0x3FFF0000,0xC346CCDA,0x24976407,0x3FBD83B2
+	.long  0x3FFF0000,0xC5672A11,0x5506DADD,0x3FBDF8AB
+	.long  0x3FFF0000,0xC78D74C8,0xABB9B15D,0xBFBDFB17
+	.long  0x3FFF0000,0xC9B9BD86,0x6E2F27A3,0xBFBEFE3C
+	.long  0x3FFF0000,0xCBEC14FE,0xF2727C5D,0xBFBBB6F8
+	.long  0x3FFF0000,0xCE248C15,0x1F8480E4,0xBFBCEE53
+	.long  0x3FFF0000,0xD06333DA,0xEF2B2595,0xBFBDA4AE
+	.long  0x3FFF0000,0xD2A81D91,0xF12AE45A,0x3FBC9124
+	.long  0x3FFF0000,0xD4F35AAB,0xCFEDFA1F,0x3FBEB243
+	.long  0x3FFF0000,0xD744FCCA,0xD69D6AF4,0x3FBDE69A
+	.long  0x3FFF0000,0xD99D15C2,0x78AFD7B6,0xBFB8BC61
+	.long  0x3FFF0000,0xDBFBB797,0xDAF23755,0x3FBDF610
+	.long  0x3FFF0000,0xDE60F482,0x5E0E9124,0xBFBD8BE1
+	.long  0x3FFF0000,0xE0CCDEEC,0x2A94E111,0x3FBACB12
+	.long  0x3FFF0000,0xE33F8972,0xBE8A5A51,0x3FBB9BFE
+	.long  0x3FFF0000,0xE5B906E7,0x7C8348A8,0x3FBCF2F4
+	.long  0x3FFF0000,0xE8396A50,0x3C4BDC68,0x3FBEF22F
+	.long  0x3FFF0000,0xEAC0C6E7,0xDD24392F,0xBFBDBF4A
+	.long  0x3FFF0000,0xED4F301E,0xD9942B84,0x3FBEC01A
+	.long  0x3FFF0000,0xEFE4B99B,0xDCDAF5CB,0x3FBE8CAC
+	.long  0x3FFF0000,0xF281773C,0x59FFB13A,0xBFBCBB3F
+	.long  0x3FFF0000,0xF5257D15,0x2486CC2C,0x3FBEF73A
+	.long  0x3FFF0000,0xF7D0DF73,0x0AD13BB9,0xBFB8B795
+	.long  0x3FFF0000,0xFA83B2DB,0x722A033A,0x3FBEF84B
+	.long  0x3FFF0000,0xFD3E0C0C,0xF486C175,0xBFBEF581
+
+	.set	N,L_SCR1
+
+	.set	X,FP_SCR1
+	.set	XDCARE,X+2
+	.set	XFRAC,X+4
+
+	.set	ADJFACT,FP_SCR2
+
+	.set	FACT1,FP_SCR3
+	.set	FACT1HI,FACT1+4
+	.set	FACT1LOW,FACT1+8
+
+	.set	FACT2,FP_SCR4
+	.set	FACT2HI,FACT2+4
+	.set	FACT2LOW,FACT2+8
+
+	| xref	t_unfl
+	|xref	t_ovfl
+	|xref	t_frcinx
+
+	.global	stwotoxd
+stwotoxd:
+|--ENTRY POINT FOR 2**(X) FOR DENORMALIZED ARGUMENT
+
+	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
+	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
+	movel		(%a0),%d0
+	orl		#0x00800001,%d0
+	fadds		%d0,%fp0
+	bra		t_frcinx
+
+	.global	stwotox
+stwotox:
+|--ENTRY POINT FOR 2**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
+	bges		TWOOK1
+	bra		EXPBORS
+
+TWOOK1:
+	cmpil		#0x400D80C0,%d0		| ...|X| > 16480?
+	bles		TWOMAIN
+	bra		EXPBORS
+
+
+TWOMAIN:
+|--USUAL CASE, 2^(-70) <= |X| <= 16480
+
+	fmovex		%fp0,%fp1
+	fmuls		#0x42800000,%fp1  | ...64 * X
+
+	fmovel		%fp1,N(%a6)		| ...N = ROUND-TO-INT(64 X)
+	movel		%d2,-(%sp)
+	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
+	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
+	movel		N(%a6),%d0
+	movel		%d0,%d2
+	andil		#0x3F,%d0		| ...D0 IS J
+	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
+	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
+	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
+	movel		%d2,%d0
+	asrl		#1,%d0		| ...D0 IS M
+	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
+	addil		#0x3FFF,%d2
+	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
+	movel		(%sp)+,%d2
+|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
+|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
+|--ADJFACT = 2^(M').
+|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
+
+	fmuls		#0x3C800000,%fp1  | ...(1/64)*N
+	movel		(%a1)+,FACT1(%a6)
+	movel		(%a1)+,FACT1HI(%a6)
+	movel		(%a1)+,FACT1LOW(%a6)
+	movew		(%a1)+,FACT2(%a6)
+	clrw		FACT2+2(%a6)
+
+	fsubx		%fp1,%fp0		| ...X - (1/64)*INT(64 X)
+
+	movew		(%a1)+,FACT2HI(%a6)
+	clrw		FACT2HI+2(%a6)
+	clrl		FACT2LOW(%a6)
+	addw		%d0,FACT1(%a6)
+
+	fmulx		LOG2,%fp0	| ...FP0 IS R
+	addw		%d0,FACT2(%a6)
+
+	bra		expr
+
+EXPBORS:
+|--FPCR, D0 SAVED
+	cmpil		#0x3FFF8000,%d0
+	bgts		EXPBIG
+
+EXPSM:
+|--|X| IS SMALL, RETURN 1 + X
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	fadds		#0x3F800000,%fp0  | ...RETURN 1 + X
+
+	bra		t_frcinx
+
+EXPBIG:
+|--|X| IS LARGE, GENERATE OVERFLOW IF X > 0; ELSE GENERATE UNDERFLOW
+|--REGISTERS SAVE SO FAR ARE FPCR AND  D0
+	movel		X(%a6),%d0
+	cmpil		#0,%d0
+	blts		EXPNEG
+
+	bclrb		#7,(%a0)		|t_ovfl expects positive value
+	bra		t_ovfl
+
+EXPNEG:
+	bclrb		#7,(%a0)		|t_unfl expects positive value
+	bra		t_unfl
+
+	.global	stentoxd
+stentoxd:
+|--ENTRY POINT FOR 10**(X) FOR DENORMALIZED ARGUMENT
+
+	fmovel		%d1,%fpcr		| ...set user's rounding mode/precision
+	fmoves		#0x3F800000,%fp0  | ...RETURN 1 + X
+	movel		(%a0),%d0
+	orl		#0x00800001,%d0
+	fadds		%d0,%fp0
+	bra		t_frcinx
+
+	.global	stentox
+stentox:
+|--ENTRY POINT FOR 10**(X), HERE X IS FINITE, NON-ZERO, AND NOT NAN'S
+	fmovemx	(%a0),%fp0-%fp0	| ...LOAD INPUT, do not set cc's
+
+	movel		(%a0),%d0
+	movew		4(%a0),%d0
+	fmovex		%fp0,X(%a6)
+	andil		#0x7FFFFFFF,%d0
+
+	cmpil		#0x3FB98000,%d0		| ...|X| >= 2**(-70)?
+	bges		TENOK1
+	bra		EXPBORS
+
+TENOK1:
+	cmpil		#0x400B9B07,%d0		| ...|X| <= 16480*log2/log10 ?
+	bles		TENMAIN
+	bra		EXPBORS
+
+TENMAIN:
+|--USUAL CASE, 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10
+
+	fmovex		%fp0,%fp1
+	fmuld		L2TEN64,%fp1	| ...X*64*LOG10/LOG2
+
+	fmovel		%fp1,N(%a6)		| ...N=INT(X*64*LOG10/LOG2)
+	movel		%d2,-(%sp)
+	lea		EXPTBL,%a1	| ...LOAD ADDRESS OF TABLE OF 2^(J/64)
+	fmovel		N(%a6),%fp1		| ...N --> FLOATING FMT
+	movel		N(%a6),%d0
+	movel		%d0,%d2
+	andil		#0x3F,%d0		| ...D0 IS J
+	asll		#4,%d0		| ...DISPLACEMENT FOR 2^(J/64)
+	addal		%d0,%a1		| ...ADDRESS FOR 2^(J/64)
+	asrl		#6,%d2		| ...d2 IS L, N = 64L + J
+	movel		%d2,%d0
+	asrl		#1,%d0		| ...D0 IS M
+	subl		%d0,%d2		| ...d2 IS M', N = 64(M+M') + J
+	addil		#0x3FFF,%d2
+	movew		%d2,ADJFACT(%a6)	| ...ADJFACT IS 2^(M')
+	movel		(%sp)+,%d2
+
+|--SUMMARY: a1 IS ADDRESS FOR THE LEADING PORTION OF 2^(J/64),
+|--D0 IS M WHERE N = 64(M+M') + J. NOTE THAT |M| <= 16140 BY DESIGN.
+|--ADJFACT = 2^(M').
+|--REGISTERS SAVED SO FAR ARE (IN ORDER) FPCR, D0, FP1, a1, AND FP2.
+
+	fmovex		%fp1,%fp2
+
+	fmuld		L10TWO1,%fp1	| ...N*(LOG2/64LOG10)_LEAD
+	movel		(%a1)+,FACT1(%a6)
+
+	fmulx		L10TWO2,%fp2	| ...N*(LOG2/64LOG10)_TRAIL
+
+	movel		(%a1)+,FACT1HI(%a6)
+	movel		(%a1)+,FACT1LOW(%a6)
+	fsubx		%fp1,%fp0		| ...X - N L_LEAD
+	movew		(%a1)+,FACT2(%a6)
+
+	fsubx		%fp2,%fp0		| ...X - N L_TRAIL
+
+	clrw		FACT2+2(%a6)
+	movew		(%a1)+,FACT2HI(%a6)
+	clrw		FACT2HI+2(%a6)
+	clrl		FACT2LOW(%a6)
+
+	fmulx		LOG10,%fp0	| ...FP0 IS R
+
+	addw		%d0,FACT1(%a6)
+	addw		%d0,FACT2(%a6)
+
+expr:
+|--FPCR, FP2, FP3 ARE SAVED IN ORDER AS SHOWN.
+|--ADJFACT CONTAINS 2**(M'), FACT1 + FACT2 = 2**(M) * 2**(J/64).
+|--FP0 IS R. THE FOLLOWING CODE COMPUTES
+|--	2**(M'+M) * 2**(J/64) * EXP(R)
+
+	fmovex		%fp0,%fp1
+	fmulx		%fp1,%fp1		| ...FP1 IS S = R*R
+
+	fmoved		EXPA5,%fp2	| ...FP2 IS A5
+	fmoved		EXPA4,%fp3	| ...FP3 IS A4
+
+	fmulx		%fp1,%fp2		| ...FP2 IS S*A5
+	fmulx		%fp1,%fp3		| ...FP3 IS S*A4
+
+	faddd		EXPA3,%fp2	| ...FP2 IS A3+S*A5
+	faddd		EXPA2,%fp3	| ...FP3 IS A2+S*A4
+
+	fmulx		%fp1,%fp2		| ...FP2 IS S*(A3+S*A5)
+	fmulx		%fp1,%fp3		| ...FP3 IS S*(A2+S*A4)
+
+	faddd		EXPA1,%fp2	| ...FP2 IS A1+S*(A3+S*A5)
+	fmulx		%fp0,%fp3		| ...FP3 IS R*S*(A2+S*A4)
+
+	fmulx		%fp1,%fp2		| ...FP2 IS S*(A1+S*(A3+S*A5))
+	faddx		%fp3,%fp0		| ...FP0 IS R+R*S*(A2+S*A4)
+
+	faddx		%fp2,%fp0		| ...FP0 IS EXP(R) - 1
+
+
+|--FINAL RECONSTRUCTION PROCESS
+|--EXP(X) = 2^M*2^(J/64) + 2^M*2^(J/64)*(EXP(R)-1)  -  (1 OR 0)
+
+	fmulx		FACT1(%a6),%fp0
+	faddx		FACT2(%a6),%fp0
+	faddx		FACT1(%a6),%fp0
+
+	fmovel		%d1,%FPCR		|restore users exceptions
+	clrw		ADJFACT+2(%a6)
+	movel		#0x80000000,ADJFACT+4(%a6)
+	clrl		ADJFACT+8(%a6)
+	fmulx		ADJFACT(%a6),%fp0	| ...FINAL ADJUSTMENT
+
+	bra		t_frcinx
+
+	|end
diff --git a/arch/m68k/fpsp040/tbldo.S b/arch/m68k/fpsp040/tbldo.S
new file mode 100644
index 0000000..fe60cf4
--- /dev/null
+++ b/arch/m68k/fpsp040/tbldo.S
@@ -0,0 +1,554 @@
+|
+|	tbldo.sa 3.1 12/10/90
+|
+| Modified:
+|	8/16/90	chinds	The table was constructed to use only one level
+|			of indirection in do_func for monadic
+|			functions.  Dyadic functions require two
+|			levels, and the tables are still contained
+|			in do_func.  The table is arranged for
+|			index with a 10-bit index, with the first
+|			7 bits the opcode, and the remaining 3
+|			the stag.  For dyadic functions, all
+|			valid addresses are to the generic entry
+|			point.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|TBLDO	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+	|xref	ld_pinf,ld_pone,ld_ppi2
+	|xref	t_dz2,t_operr
+	|xref	serror,sone,szero,sinf,snzrinx
+	|xref	sopr_inf,spi_2,src_nan,szr_inf
+
+	|xref	smovcr
+	|xref	pmod,prem,pscale
+	|xref	satanh,satanhd
+	|xref	sacos,sacosd,sasin,sasind,satan,satand
+	|xref	setox,setoxd,setoxm1,setoxm1d,setoxm1i
+	|xref	sgetexp,sgetexpd,sgetman,sgetmand
+	|xref	sint,sintd,sintrz
+	|xref	ssincos,ssincosd,ssincosi,ssincosnan,ssincosz
+	|xref	scos,scosd,ssin,ssind,stan,stand
+	|xref	scosh,scoshd,ssinh,ssinhd,stanh,stanhd
+	|xref	sslog10,sslog2,sslogn,sslognp1
+	|xref	sslog10d,sslog2d,sslognd,slognp1d
+	|xref	stentox,stentoxd,stwotox,stwotoxd
+
+|	instruction		;opcode-stag Notes
+	.global	tblpre
+tblpre:
+	.long	smovcr		|$00-0 fmovecr all
+	.long	smovcr		|$00-1 fmovecr all
+	.long	smovcr		|$00-2 fmovecr all
+	.long	smovcr		|$00-3 fmovecr all
+	.long	smovcr		|$00-4 fmovecr all
+	.long	smovcr		|$00-5 fmovecr all
+	.long	smovcr		|$00-6 fmovecr all
+	.long	smovcr		|$00-7 fmovecr all
+
+	.long	sint		|$01-0 fint norm
+	.long	szero		|$01-1 fint zero
+	.long	sinf		|$01-2 fint inf
+	.long	src_nan		|$01-3 fint nan
+	.long	sintd		|$01-4 fint denorm inx
+	.long	serror		|$01-5 fint ERROR
+	.long	serror		|$01-6 fint ERROR
+	.long	serror		|$01-7 fint ERROR
+
+	.long	ssinh		|$02-0 fsinh norm
+	.long	szero		|$02-1 fsinh zero
+	.long	sinf		|$02-2 fsinh inf
+	.long	src_nan		|$02-3 fsinh nan
+	.long	ssinhd		|$02-4 fsinh denorm
+	.long	serror		|$02-5 fsinh ERROR
+	.long	serror		|$02-6 fsinh ERROR
+	.long	serror		|$02-7 fsinh ERROR
+
+	.long	sintrz		|$03-0 fintrz norm
+	.long	szero		|$03-1 fintrz zero
+	.long	sinf		|$03-2 fintrz inf
+	.long	src_nan		|$03-3 fintrz nan
+	.long	snzrinx		|$03-4 fintrz denorm inx
+	.long	serror		|$03-5 fintrz ERROR
+	.long	serror		|$03-6 fintrz ERROR
+	.long	serror		|$03-7 fintrz ERROR
+
+	.long	serror		|$04-0 ERROR - illegal extension
+	.long	serror		|$04-1 ERROR - illegal extension
+	.long	serror		|$04-2 ERROR - illegal extension
+	.long	serror		|$04-3 ERROR - illegal extension
+	.long	serror		|$04-4 ERROR - illegal extension
+	.long	serror		|$04-5 ERROR - illegal extension
+	.long	serror		|$04-6 ERROR - illegal extension
+	.long	serror		|$04-7 ERROR - illegal extension
+
+	.long	serror		|$05-0 ERROR - illegal extension
+	.long	serror		|$05-1 ERROR - illegal extension
+	.long	serror		|$05-2 ERROR - illegal extension
+	.long	serror		|$05-3 ERROR - illegal extension
+	.long	serror		|$05-4 ERROR - illegal extension
+	.long	serror		|$05-5 ERROR - illegal extension
+	.long	serror		|$05-6 ERROR - illegal extension
+	.long	serror		|$05-7 ERROR - illegal extension
+
+	.long	sslognp1	|$06-0 flognp1 norm
+	.long	szero		|$06-1 flognp1 zero
+	.long	sopr_inf	|$06-2 flognp1 inf
+	.long	src_nan		|$06-3 flognp1 nan
+	.long	slognp1d	|$06-4 flognp1 denorm
+	.long	serror		|$06-5 flognp1 ERROR
+	.long	serror		|$06-6 flognp1 ERROR
+	.long	serror		|$06-7 flognp1 ERROR
+
+	.long	serror		|$07-0 ERROR - illegal extension
+	.long	serror		|$07-1 ERROR - illegal extension
+	.long	serror		|$07-2 ERROR - illegal extension
+	.long	serror		|$07-3 ERROR - illegal extension
+	.long	serror		|$07-4 ERROR - illegal extension
+	.long	serror		|$07-5 ERROR - illegal extension
+	.long	serror		|$07-6 ERROR - illegal extension
+	.long	serror		|$07-7 ERROR - illegal extension
+
+	.long	setoxm1		|$08-0 fetoxm1 norm
+	.long	szero		|$08-1 fetoxm1 zero
+	.long	setoxm1i	|$08-2 fetoxm1 inf
+	.long	src_nan		|$08-3 fetoxm1 nan
+	.long	setoxm1d	|$08-4 fetoxm1 denorm
+	.long	serror		|$08-5 fetoxm1 ERROR
+	.long	serror		|$08-6 fetoxm1 ERROR
+	.long	serror		|$08-7 fetoxm1 ERROR
+
+	.long	stanh		|$09-0 ftanh norm
+	.long	szero		|$09-1 ftanh zero
+	.long	sone		|$09-2 ftanh inf
+	.long	src_nan		|$09-3 ftanh nan
+	.long	stanhd		|$09-4 ftanh denorm
+	.long	serror		|$09-5 ftanh ERROR
+	.long	serror		|$09-6 ftanh ERROR
+	.long	serror		|$09-7 ftanh ERROR
+
+	.long	satan		|$0a-0 fatan norm
+	.long	szero		|$0a-1 fatan zero
+	.long	spi_2		|$0a-2 fatan inf
+	.long	src_nan		|$0a-3 fatan nan
+	.long	satand		|$0a-4 fatan denorm
+	.long	serror		|$0a-5 fatan ERROR
+	.long	serror		|$0a-6 fatan ERROR
+	.long	serror		|$0a-7 fatan ERROR
+
+	.long	serror		|$0b-0 ERROR - illegal extension
+	.long	serror		|$0b-1 ERROR - illegal extension
+	.long	serror		|$0b-2 ERROR - illegal extension
+	.long	serror		|$0b-3 ERROR - illegal extension
+	.long	serror		|$0b-4 ERROR - illegal extension
+	.long	serror		|$0b-5 ERROR - illegal extension
+	.long	serror		|$0b-6 ERROR - illegal extension
+	.long	serror		|$0b-7 ERROR - illegal extension
+
+	.long	sasin		|$0c-0 fasin norm
+	.long	szero		|$0c-1 fasin zero
+	.long	t_operr		|$0c-2 fasin inf
+	.long	src_nan		|$0c-3 fasin nan
+	.long	sasind		|$0c-4 fasin denorm
+	.long	serror		|$0c-5 fasin ERROR
+	.long	serror		|$0c-6 fasin ERROR
+	.long	serror		|$0c-7 fasin ERROR
+
+	.long	satanh		|$0d-0 fatanh norm
+	.long	szero		|$0d-1 fatanh zero
+	.long	t_operr		|$0d-2 fatanh inf
+	.long	src_nan		|$0d-3 fatanh nan
+	.long	satanhd		|$0d-4 fatanh denorm
+	.long	serror		|$0d-5 fatanh ERROR
+	.long	serror		|$0d-6 fatanh ERROR
+	.long	serror		|$0d-7 fatanh ERROR
+
+	.long	ssin		|$0e-0 fsin norm
+	.long	szero		|$0e-1 fsin zero
+	.long	t_operr		|$0e-2 fsin inf
+	.long	src_nan		|$0e-3 fsin nan
+	.long	ssind		|$0e-4 fsin denorm
+	.long	serror		|$0e-5 fsin ERROR
+	.long	serror		|$0e-6 fsin ERROR
+	.long	serror		|$0e-7 fsin ERROR
+
+	.long	stan		|$0f-0 ftan norm
+	.long	szero		|$0f-1 ftan zero
+	.long	t_operr		|$0f-2 ftan inf
+	.long	src_nan		|$0f-3 ftan nan
+	.long	stand		|$0f-4 ftan denorm
+	.long	serror		|$0f-5 ftan ERROR
+	.long	serror		|$0f-6 ftan ERROR
+	.long	serror		|$0f-7 ftan ERROR
+
+	.long	setox		|$10-0 fetox norm
+	.long	ld_pone		|$10-1 fetox zero
+	.long	szr_inf		|$10-2 fetox inf
+	.long	src_nan		|$10-3 fetox nan
+	.long	setoxd		|$10-4 fetox denorm
+	.long	serror		|$10-5 fetox ERROR
+	.long	serror		|$10-6 fetox ERROR
+	.long	serror		|$10-7 fetox ERROR
+
+	.long	stwotox		|$11-0 ftwotox norm
+	.long	ld_pone		|$11-1 ftwotox zero
+	.long	szr_inf		|$11-2 ftwotox inf
+	.long	src_nan		|$11-3 ftwotox nan
+	.long	stwotoxd	|$11-4 ftwotox denorm
+	.long	serror		|$11-5 ftwotox ERROR
+	.long	serror		|$11-6 ftwotox ERROR
+	.long	serror		|$11-7 ftwotox ERROR
+
+	.long	stentox		|$12-0 ftentox norm
+	.long	ld_pone		|$12-1 ftentox zero
+	.long	szr_inf		|$12-2 ftentox inf
+	.long	src_nan		|$12-3 ftentox nan
+	.long	stentoxd	|$12-4 ftentox denorm
+	.long	serror		|$12-5 ftentox ERROR
+	.long	serror		|$12-6 ftentox ERROR
+	.long	serror		|$12-7 ftentox ERROR
+
+	.long	serror		|$13-0 ERROR - illegal extension
+	.long	serror		|$13-1 ERROR - illegal extension
+	.long	serror		|$13-2 ERROR - illegal extension
+	.long	serror		|$13-3 ERROR - illegal extension
+	.long	serror		|$13-4 ERROR - illegal extension
+	.long	serror		|$13-5 ERROR - illegal extension
+	.long	serror		|$13-6 ERROR - illegal extension
+	.long	serror		|$13-7 ERROR - illegal extension
+
+	.long	sslogn		|$14-0 flogn norm
+	.long	t_dz2		|$14-1 flogn zero
+	.long	sopr_inf	|$14-2 flogn inf
+	.long	src_nan		|$14-3 flogn nan
+	.long	sslognd		|$14-4 flogn denorm
+	.long	serror		|$14-5 flogn ERROR
+	.long	serror		|$14-6 flogn ERROR
+	.long	serror		|$14-7 flogn ERROR
+
+	.long	sslog10		|$15-0 flog10 norm
+	.long	t_dz2		|$15-1 flog10 zero
+	.long	sopr_inf	|$15-2 flog10 inf
+	.long	src_nan		|$15-3 flog10 nan
+	.long	sslog10d	|$15-4 flog10 denorm
+	.long	serror		|$15-5 flog10 ERROR
+	.long	serror		|$15-6 flog10 ERROR
+	.long	serror		|$15-7 flog10 ERROR
+
+	.long	sslog2		|$16-0 flog2 norm
+	.long	t_dz2		|$16-1 flog2 zero
+	.long	sopr_inf	|$16-2 flog2 inf
+	.long	src_nan		|$16-3 flog2 nan
+	.long	sslog2d		|$16-4 flog2 denorm
+	.long	serror		|$16-5 flog2 ERROR
+	.long	serror		|$16-6 flog2 ERROR
+	.long	serror		|$16-7 flog2 ERROR
+
+	.long	serror		|$17-0 ERROR - illegal extension
+	.long	serror		|$17-1 ERROR - illegal extension
+	.long	serror		|$17-2 ERROR - illegal extension
+	.long	serror		|$17-3 ERROR - illegal extension
+	.long	serror		|$17-4 ERROR - illegal extension
+	.long	serror		|$17-5 ERROR - illegal extension
+	.long	serror		|$17-6 ERROR - illegal extension
+	.long	serror		|$17-7 ERROR - illegal extension
+
+	.long	serror		|$18-0 ERROR - illegal extension
+	.long	serror		|$18-1 ERROR - illegal extension
+	.long	serror		|$18-2 ERROR - illegal extension
+	.long	serror		|$18-3 ERROR - illegal extension
+	.long	serror		|$18-4 ERROR - illegal extension
+	.long	serror		|$18-5 ERROR - illegal extension
+	.long	serror		|$18-6 ERROR - illegal extension
+	.long	serror		|$18-7 ERROR - illegal extension
+
+	.long	scosh		|$19-0 fcosh norm
+	.long	ld_pone		|$19-1 fcosh zero
+	.long	ld_pinf		|$19-2 fcosh inf
+	.long	src_nan		|$19-3 fcosh nan
+	.long	scoshd		|$19-4 fcosh denorm
+	.long	serror		|$19-5 fcosh ERROR
+	.long	serror		|$19-6 fcosh ERROR
+	.long	serror		|$19-7 fcosh ERROR
+
+	.long	serror		|$1a-0 ERROR - illegal extension
+	.long	serror		|$1a-1 ERROR - illegal extension
+	.long	serror		|$1a-2 ERROR - illegal extension
+	.long	serror		|$1a-3 ERROR - illegal extension
+	.long	serror		|$1a-4 ERROR - illegal extension
+	.long	serror		|$1a-5 ERROR - illegal extension
+	.long	serror		|$1a-6 ERROR - illegal extension
+	.long	serror		|$1a-7 ERROR - illegal extension
+
+	.long	serror		|$1b-0 ERROR - illegal extension
+	.long	serror		|$1b-1 ERROR - illegal extension
+	.long	serror		|$1b-2 ERROR - illegal extension
+	.long	serror		|$1b-3 ERROR - illegal extension
+	.long	serror		|$1b-4 ERROR - illegal extension
+	.long	serror		|$1b-5 ERROR - illegal extension
+	.long	serror		|$1b-6 ERROR - illegal extension
+	.long	serror		|$1b-7 ERROR - illegal extension
+
+	.long	sacos		|$1c-0 facos norm
+	.long	ld_ppi2		|$1c-1 facos zero
+	.long	t_operr		|$1c-2 facos inf
+	.long	src_nan		|$1c-3 facos nan
+	.long	sacosd		|$1c-4 facos denorm
+	.long	serror		|$1c-5 facos ERROR
+	.long	serror		|$1c-6 facos ERROR
+	.long	serror		|$1c-7 facos ERROR
+
+	.long	scos		|$1d-0 fcos norm
+	.long	ld_pone		|$1d-1 fcos zero
+	.long	t_operr		|$1d-2 fcos inf
+	.long	src_nan		|$1d-3 fcos nan
+	.long	scosd		|$1d-4 fcos denorm
+	.long	serror		|$1d-5 fcos ERROR
+	.long	serror		|$1d-6 fcos ERROR
+	.long	serror		|$1d-7 fcos ERROR
+
+	.long	sgetexp		|$1e-0 fgetexp norm
+	.long	szero		|$1e-1 fgetexp zero
+	.long	t_operr		|$1e-2 fgetexp inf
+	.long	src_nan		|$1e-3 fgetexp nan
+	.long	sgetexpd	|$1e-4 fgetexp denorm
+	.long	serror		|$1e-5 fgetexp ERROR
+	.long	serror		|$1e-6 fgetexp ERROR
+	.long	serror		|$1e-7 fgetexp ERROR
+
+	.long	sgetman		|$1f-0 fgetman norm
+	.long	szero		|$1f-1 fgetman zero
+	.long	t_operr		|$1f-2 fgetman inf
+	.long	src_nan		|$1f-3 fgetman nan
+	.long	sgetmand	|$1f-4 fgetman denorm
+	.long	serror		|$1f-5 fgetman ERROR
+	.long	serror		|$1f-6 fgetman ERROR
+	.long	serror		|$1f-7 fgetman ERROR
+
+	.long	serror		|$20-0 ERROR - illegal extension
+	.long	serror		|$20-1 ERROR - illegal extension
+	.long	serror		|$20-2 ERROR - illegal extension
+	.long	serror		|$20-3 ERROR - illegal extension
+	.long	serror		|$20-4 ERROR - illegal extension
+	.long	serror		|$20-5 ERROR - illegal extension
+	.long	serror		|$20-6 ERROR - illegal extension
+	.long	serror		|$20-7 ERROR - illegal extension
+
+	.long	pmod		|$21-0 fmod all
+	.long	pmod		|$21-1 fmod all
+	.long	pmod		|$21-2 fmod all
+	.long	pmod		|$21-3 fmod all
+	.long	pmod		|$21-4 fmod all
+	.long	serror		|$21-5 fmod ERROR
+	.long	serror		|$21-6 fmod ERROR
+	.long	serror		|$21-7 fmod ERROR
+
+	.long	serror		|$22-0 ERROR - illegal extension
+	.long	serror		|$22-1 ERROR - illegal extension
+	.long	serror		|$22-2 ERROR - illegal extension
+	.long	serror		|$22-3 ERROR - illegal extension
+	.long	serror		|$22-4 ERROR - illegal extension
+	.long	serror		|$22-5 ERROR - illegal extension
+	.long	serror		|$22-6 ERROR - illegal extension
+	.long	serror		|$22-7 ERROR - illegal extension
+
+	.long	serror		|$23-0 ERROR - illegal extension
+	.long	serror		|$23-1 ERROR - illegal extension
+	.long	serror		|$23-2 ERROR - illegal extension
+	.long	serror		|$23-3 ERROR - illegal extension
+	.long	serror		|$23-4 ERROR - illegal extension
+	.long	serror		|$23-5 ERROR - illegal extension
+	.long	serror		|$23-6 ERROR - illegal extension
+	.long	serror		|$23-7 ERROR - illegal extension
+
+	.long	serror		|$24-0 ERROR - illegal extension
+	.long	serror		|$24-1 ERROR - illegal extension
+	.long	serror		|$24-2 ERROR - illegal extension
+	.long	serror		|$24-3 ERROR - illegal extension
+	.long	serror		|$24-4 ERROR - illegal extension
+	.long	serror		|$24-5 ERROR - illegal extension
+	.long	serror		|$24-6 ERROR - illegal extension
+	.long	serror		|$24-7 ERROR - illegal extension
+
+	.long	prem		|$25-0 frem all
+	.long	prem		|$25-1 frem all
+	.long	prem		|$25-2 frem all
+	.long	prem		|$25-3 frem all
+	.long	prem		|$25-4 frem all
+	.long	serror		|$25-5 frem ERROR
+	.long	serror		|$25-6 frem ERROR
+	.long	serror		|$25-7 frem ERROR
+
+	.long	pscale		|$26-0 fscale all
+	.long	pscale		|$26-1 fscale all
+	.long	pscale		|$26-2 fscale all
+	.long	pscale		|$26-3 fscale all
+	.long	pscale		|$26-4 fscale all
+	.long	serror		|$26-5 fscale ERROR
+	.long	serror		|$26-6 fscale ERROR
+	.long	serror		|$26-7 fscale ERROR
+
+	.long	serror		|$27-0 ERROR - illegal extension
+	.long	serror		|$27-1 ERROR - illegal extension
+	.long	serror		|$27-2 ERROR - illegal extension
+	.long	serror		|$27-3 ERROR - illegal extension
+	.long	serror		|$27-4 ERROR - illegal extension
+	.long	serror		|$27-5 ERROR - illegal extension
+	.long	serror		|$27-6 ERROR - illegal extension
+	.long	serror		|$27-7 ERROR - illegal extension
+
+	.long	serror		|$28-0 ERROR - illegal extension
+	.long	serror		|$28-1 ERROR - illegal extension
+	.long	serror		|$28-2 ERROR - illegal extension
+	.long	serror		|$28-3 ERROR - illegal extension
+	.long	serror		|$28-4 ERROR - illegal extension
+	.long	serror		|$28-5 ERROR - illegal extension
+	.long	serror		|$28-6 ERROR - illegal extension
+	.long	serror		|$28-7 ERROR - illegal extension
+
+	.long	serror		|$29-0 ERROR - illegal extension
+	.long	serror		|$29-1 ERROR - illegal extension
+	.long	serror		|$29-2 ERROR - illegal extension
+	.long	serror		|$29-3 ERROR - illegal extension
+	.long	serror		|$29-4 ERROR - illegal extension
+	.long	serror		|$29-5 ERROR - illegal extension
+	.long	serror		|$29-6 ERROR - illegal extension
+	.long	serror		|$29-7 ERROR - illegal extension
+
+	.long	serror		|$2a-0 ERROR - illegal extension
+	.long	serror		|$2a-1 ERROR - illegal extension
+	.long	serror		|$2a-2 ERROR - illegal extension
+	.long	serror		|$2a-3 ERROR - illegal extension
+	.long	serror		|$2a-4 ERROR - illegal extension
+	.long	serror		|$2a-5 ERROR - illegal extension
+	.long	serror		|$2a-6 ERROR - illegal extension
+	.long	serror		|$2a-7 ERROR - illegal extension
+
+	.long	serror		|$2b-0 ERROR - illegal extension
+	.long	serror		|$2b-1 ERROR - illegal extension
+	.long	serror		|$2b-2 ERROR - illegal extension
+	.long	serror		|$2b-3 ERROR - illegal extension
+	.long	serror		|$2b-4 ERROR - illegal extension
+	.long	serror		|$2b-5 ERROR - illegal extension
+	.long	serror		|$2b-6 ERROR - illegal extension
+	.long	serror		|$2b-7 ERROR - illegal extension
+
+	.long	serror		|$2c-0 ERROR - illegal extension
+	.long	serror		|$2c-1 ERROR - illegal extension
+	.long	serror		|$2c-2 ERROR - illegal extension
+	.long	serror		|$2c-3 ERROR - illegal extension
+	.long	serror		|$2c-4 ERROR - illegal extension
+	.long	serror		|$2c-5 ERROR - illegal extension
+	.long	serror		|$2c-6 ERROR - illegal extension
+	.long	serror		|$2c-7 ERROR - illegal extension
+
+	.long	serror		|$2d-0 ERROR - illegal extension
+	.long	serror		|$2d-1 ERROR - illegal extension
+	.long	serror		|$2d-2 ERROR - illegal extension
+	.long	serror		|$2d-3 ERROR - illegal extension
+	.long	serror		|$2d-4 ERROR - illegal extension
+	.long	serror		|$2d-5 ERROR - illegal extension
+	.long	serror		|$2d-6 ERROR - illegal extension
+	.long	serror		|$2d-7 ERROR - illegal extension
+
+	.long	serror		|$2e-0 ERROR - illegal extension
+	.long	serror		|$2e-1 ERROR - illegal extension
+	.long	serror		|$2e-2 ERROR - illegal extension
+	.long	serror		|$2e-3 ERROR - illegal extension
+	.long	serror		|$2e-4 ERROR - illegal extension
+	.long	serror		|$2e-5 ERROR - illegal extension
+	.long	serror		|$2e-6 ERROR - illegal extension
+	.long	serror		|$2e-7 ERROR - illegal extension
+
+	.long	serror		|$2f-0 ERROR - illegal extension
+	.long	serror		|$2f-1 ERROR - illegal extension
+	.long	serror		|$2f-2 ERROR - illegal extension
+	.long	serror		|$2f-3 ERROR - illegal extension
+	.long	serror		|$2f-4 ERROR - illegal extension
+	.long	serror		|$2f-5 ERROR - illegal extension
+	.long	serror		|$2f-6 ERROR - illegal extension
+	.long	serror		|$2f-7 ERROR - illegal extension
+
+	.long	ssincos		|$30-0 fsincos norm
+	.long	ssincosz	|$30-1 fsincos zero
+	.long	ssincosi	|$30-2 fsincos inf
+	.long	ssincosnan	|$30-3 fsincos nan
+	.long	ssincosd	|$30-4 fsincos denorm
+	.long	serror		|$30-5 fsincos ERROR
+	.long	serror		|$30-6 fsincos ERROR
+	.long	serror		|$30-7 fsincos ERROR
+
+	.long	ssincos		|$31-0 fsincos norm
+	.long	ssincosz	|$31-1 fsincos zero
+	.long	ssincosi	|$31-2 fsincos inf
+	.long	ssincosnan	|$31-3 fsincos nan
+	.long	ssincosd	|$31-4 fsincos denorm
+	.long	serror		|$31-5 fsincos ERROR
+	.long	serror		|$31-6 fsincos ERROR
+	.long	serror		|$31-7 fsincos ERROR
+
+	.long	ssincos		|$32-0 fsincos norm
+	.long	ssincosz	|$32-1 fsincos zero
+	.long	ssincosi	|$32-2 fsincos inf
+	.long	ssincosnan	|$32-3 fsincos nan
+	.long	ssincosd	|$32-4 fsincos denorm
+	.long	serror		|$32-5 fsincos ERROR
+	.long	serror		|$32-6 fsincos ERROR
+	.long	serror		|$32-7 fsincos ERROR
+
+	.long	ssincos		|$33-0 fsincos norm
+	.long	ssincosz	|$33-1 fsincos zero
+	.long	ssincosi	|$33-2 fsincos inf
+	.long	ssincosnan	|$33-3 fsincos nan
+	.long	ssincosd	|$33-4 fsincos denorm
+	.long	serror		|$33-5 fsincos ERROR
+	.long	serror		|$33-6 fsincos ERROR
+	.long	serror		|$33-7 fsincos ERROR
+
+	.long	ssincos		|$34-0 fsincos norm
+	.long	ssincosz	|$34-1 fsincos zero
+	.long	ssincosi	|$34-2 fsincos inf
+	.long	ssincosnan	|$34-3 fsincos nan
+	.long	ssincosd	|$34-4 fsincos denorm
+	.long	serror		|$34-5 fsincos ERROR
+	.long	serror		|$34-6 fsincos ERROR
+	.long	serror		|$34-7 fsincos ERROR
+
+	.long	ssincos		|$35-0 fsincos norm
+	.long	ssincosz	|$35-1 fsincos zero
+	.long	ssincosi	|$35-2 fsincos inf
+	.long	ssincosnan	|$35-3 fsincos nan
+	.long	ssincosd	|$35-4 fsincos denorm
+	.long	serror		|$35-5 fsincos ERROR
+	.long	serror		|$35-6 fsincos ERROR
+	.long	serror		|$35-7 fsincos ERROR
+
+	.long	ssincos		|$36-0 fsincos norm
+	.long	ssincosz	|$36-1 fsincos zero
+	.long	ssincosi	|$36-2 fsincos inf
+	.long	ssincosnan	|$36-3 fsincos nan
+	.long	ssincosd	|$36-4 fsincos denorm
+	.long	serror		|$36-5 fsincos ERROR
+	.long	serror		|$36-6 fsincos ERROR
+	.long	serror		|$36-7 fsincos ERROR
+
+	.long	ssincos		|$37-0 fsincos norm
+	.long	ssincosz	|$37-1 fsincos zero
+	.long	ssincosi	|$37-2 fsincos inf
+	.long	ssincosnan	|$37-3 fsincos nan
+	.long	ssincosd	|$37-4 fsincos denorm
+	.long	serror		|$37-5 fsincos ERROR
+	.long	serror		|$37-6 fsincos ERROR
+	.long	serror		|$37-7 fsincos ERROR
+
+	|end
diff --git a/arch/m68k/fpsp040/util.S b/arch/m68k/fpsp040/util.S
new file mode 100644
index 0000000..452f3d6
--- /dev/null
+++ b/arch/m68k/fpsp040/util.S
@@ -0,0 +1,748 @@
+|
+|	util.sa 3.7 7/29/91
+|
+|	This file contains routines used by other programs.
+|
+|	ovf_res: used by overflow to force the correct
+|		 result. ovf_r_k, ovf_r_x2, ovf_r_x3 are
+|		 derivatives of this routine.
+|	get_fline: get user's opcode word
+|	g_dfmtou: returns the destination format.
+|	g_opcls: returns the opclass of the float instruction.
+|	g_rndpr: returns the rounding precision.
+|	reg_dest: write byte, word, or long data to Dn
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+|UTIL	idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	mem_read
+
+	.global	g_dfmtou
+	.global	g_opcls
+	.global	g_rndpr
+	.global	get_fline
+	.global	reg_dest
+
+|
+| Final result table for ovf_res. Note that the negative counterparts
+| are unnecessary as ovf_res always returns the sign separately from
+| the exponent.
+|					;+inf
+EXT_PINF:	.long	0x7fff0000,0x00000000,0x00000000,0x00000000
+|					;largest +ext
+EXT_PLRG:	.long	0x7ffe0000,0xffffffff,0xffffffff,0x00000000
+|					;largest magnitude +sgl in ext
+SGL_PLRG:	.long	0x407e0000,0xffffff00,0x00000000,0x00000000
+|					;largest magnitude +dbl in ext
+DBL_PLRG:	.long	0x43fe0000,0xffffffff,0xfffff800,0x00000000
+|					;largest -ext
+
+tblovfl:
+	.long	EXT_RN
+	.long	EXT_RZ
+	.long	EXT_RM
+	.long	EXT_RP
+	.long	SGL_RN
+	.long	SGL_RZ
+	.long	SGL_RM
+	.long	SGL_RP
+	.long	DBL_RN
+	.long	DBL_RZ
+	.long	DBL_RM
+	.long	DBL_RP
+	.long	error
+	.long	error
+	.long	error
+	.long	error
+
+
+|
+|	ovf_r_k --- overflow result calculation
+|
+| This entry point is used by kernel_ex.
+|
+| This forces the destination precision to be extended
+|
+| Input:	operand in ETEMP
+| Output:	a result is in ETEMP (internal extended format)
+|
+	.global	ovf_r_k
+ovf_r_k:
+	lea	ETEMP(%a6),%a0	|a0 points to source operand
+	bclrb	#sign_bit,ETEMP_EX(%a6)
+	sne	ETEMP_SGN(%a6)	|convert to internal IEEE format
+
+|
+|	ovf_r_x2 --- overflow result calculation
+|
+| This entry point used by x_ovfl.  (opclass 0 and 2)
+|
+| Input		a0  points to an operand in the internal extended format
+| Output	a0  points to the result in the internal extended format
+|
+| This sets the round precision according to the user's FPCR unless the
+| instruction is fsgldiv or fsglmul or fsadd, fdadd, fsub, fdsub, fsmul,
+| fdmul, fsdiv, fddiv, fssqrt, fsmove, fdmove, fsabs, fdabs, fsneg, fdneg.
+| If the instruction is fsgldiv of fsglmul, the rounding precision must be
+| extended.  If the instruction is not fsgldiv or fsglmul but a force-
+| precision instruction, the rounding precision is then set to the force
+| precision.
+
+	.global	ovf_r_x2
+ovf_r_x2:
+	btstb	#E3,E_BYTE(%a6)		|check for nu exception
+	beql	ovf_e1_exc		|it is cu exception
+ovf_e3_exc:
+	movew	CMDREG3B(%a6),%d0		|get the command word
+	andiw	#0x00000060,%d0		|clear all bits except 6 and 5
+	cmpil	#0x00000040,%d0
+	beql	ovff_sgl		|force precision is single
+	cmpil	#0x00000060,%d0
+	beql	ovff_dbl		|force precision is double
+	movew	CMDREG3B(%a6),%d0		|get the command word again
+	andil	#0x7f,%d0			|clear all except operation
+	cmpil	#0x33,%d0
+	beql	ovf_fsgl		|fsglmul or fsgldiv
+	cmpil	#0x30,%d0
+	beql	ovf_fsgl
+	bra	ovf_fpcr		|instruction is none of the above
+|					;use FPCR
+ovf_e1_exc:
+	movew	CMDREG1B(%a6),%d0		|get command word
+	andil	#0x00000044,%d0		|clear all bits except 6 and 2
+	cmpil	#0x00000040,%d0
+	beql	ovff_sgl		|the instruction is force single
+	cmpil	#0x00000044,%d0
+	beql	ovff_dbl		|the instruction is force double
+	movew	CMDREG1B(%a6),%d0		|again get the command word
+	andil	#0x0000007f,%d0		|clear all except the op code
+	cmpil	#0x00000027,%d0
+	beql	ovf_fsgl		|fsglmul
+	cmpil	#0x00000024,%d0
+	beql	ovf_fsgl		|fsgldiv
+	bra	ovf_fpcr		|none of the above, use FPCR
+|
+|
+| Inst is either fsgldiv or fsglmul.  Force extended precision.
+|
+ovf_fsgl:
+	clrl	%d0
+	bra	ovf_res
+
+ovff_sgl:
+	movel	#0x00000001,%d0		|set single
+	bra	ovf_res
+ovff_dbl:
+	movel	#0x00000002,%d0		|set double
+	bra	ovf_res
+|
+| The precision is in the fpcr.
+|
+ovf_fpcr:
+	bfextu	FPCR_MODE(%a6){#0:#2},%d0 |set round precision
+	bra	ovf_res
+
+|
+|
+|	ovf_r_x3 --- overflow result calculation
+|
+| This entry point used by x_ovfl. (opclass 3 only)
+|
+| Input		a0  points to an operand in the internal extended format
+| Output	a0  points to the result in the internal extended format
+|
+| This sets the round precision according to the destination size.
+|
+	.global	ovf_r_x3
+ovf_r_x3:
+	bsr	g_dfmtou	|get dest fmt in d0{1:0}
+|				;for fmovout, the destination format
+|				;is the rounding precision
+
+|
+|	ovf_res --- overflow result calculation
+|
+| Input:
+|	a0	points to operand in internal extended format
+| Output:
+|	a0	points to result in internal extended format
+|
+	.global	ovf_res
+ovf_res:
+	lsll	#2,%d0		|move round precision to d0{3:2}
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |set round mode
+	orl	%d1,%d0		|index is fmt:mode in d0{3:0}
+	leal	tblovfl,%a1	|load a1 with table address
+	movel	%a1@(%d0:l:4),%a1	|use d0 as index to the table
+	jmp	(%a1)		|go to the correct routine
+|
+|case DEST_FMT = EXT
+|
+EXT_RN:
+	leal	EXT_PINF,%a1	|answer is +/- infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	set_sign	|now go set the sign
+EXT_RZ:
+	leal	EXT_PLRG,%a1	|answer is +/- large number
+	bra	set_sign	|now go set the sign
+EXT_RM:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	e_rm_pos
+e_rm_neg:
+	leal	EXT_PINF,%a1	|answer is negative infinity
+	orl	#neginf_mask,USER_FPSR(%a6)
+	bra	end_ovfr
+e_rm_pos:
+	leal	EXT_PLRG,%a1	|answer is large positive number
+	bra	end_ovfr
+EXT_RP:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	e_rp_pos
+e_rp_neg:
+	leal	EXT_PLRG,%a1	|answer is large negative number
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+e_rp_pos:
+	leal	EXT_PINF,%a1	|answer is positive infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+|
+|case DEST_FMT = DBL
+|
+DBL_RN:
+	leal	EXT_PINF,%a1	|answer is +/- infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	set_sign
+DBL_RZ:
+	leal	DBL_PLRG,%a1	|answer is +/- large number
+	bra	set_sign	|now go set the sign
+DBL_RM:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	d_rm_pos
+d_rm_neg:
+	leal	EXT_PINF,%a1	|answer is negative infinity
+	orl	#neginf_mask,USER_FPSR(%a6)
+	bra	end_ovfr	|inf is same for all precisions (ext,dbl,sgl)
+d_rm_pos:
+	leal	DBL_PLRG,%a1	|answer is large positive number
+	bra	end_ovfr
+DBL_RP:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	d_rp_pos
+d_rp_neg:
+	leal	DBL_PLRG,%a1	|answer is large negative number
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+d_rp_pos:
+	leal	EXT_PINF,%a1	|answer is positive infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bra	end_ovfr
+|
+|case DEST_FMT = SGL
+|
+SGL_RN:
+	leal	EXT_PINF,%a1	|answer is +/-  infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bras	set_sign
+SGL_RZ:
+	leal	SGL_PLRG,%a1	|answer is +/- large number
+	bras	set_sign
+SGL_RM:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	s_rm_pos
+s_rm_neg:
+	leal	EXT_PINF,%a1	|answer is negative infinity
+	orl	#neginf_mask,USER_FPSR(%a6)
+	bras	end_ovfr
+s_rm_pos:
+	leal	SGL_PLRG,%a1	|answer is large positive number
+	bras	end_ovfr
+SGL_RP:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	s_rp_pos
+s_rp_neg:
+	leal	SGL_PLRG,%a1	|answer is large negative number
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bras	end_ovfr
+s_rp_pos:
+	leal	EXT_PINF,%a1	|answer is positive infinity
+	bsetb	#inf_bit,FPSR_CC(%a6)
+	bras	end_ovfr
+
+set_sign:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	end_ovfr
+neg_sign:
+	bsetb	#neg_bit,FPSR_CC(%a6)
+
+end_ovfr:
+	movew	LOCAL_EX(%a1),LOCAL_EX(%a0) |do not overwrite sign
+	movel	LOCAL_HI(%a1),LOCAL_HI(%a0)
+	movel	LOCAL_LO(%a1),LOCAL_LO(%a0)
+	rts
+
+
+|
+|	ERROR
+|
+error:
+	rts
+|
+|	get_fline --- get f-line opcode of interrupted instruction
+|
+|	Returns opcode in the low word of d0.
+|
+get_fline:
+	movel	USER_FPIAR(%a6),%a0	|opcode address
+	movel	#0,-(%a7)	|reserve a word on the stack
+	leal	2(%a7),%a1	|point to low word of temporary
+	movel	#2,%d0		|count
+	bsrl	mem_read
+	movel	(%a7)+,%d0
+	rts
+|
+|	g_rndpr --- put rounding precision in d0{1:0}
+|
+|	valid return codes are:
+|		00 - extended
+|		01 - single
+|		10 - double
+|
+| begin
+| get rounding precision (cmdreg3b{6:5})
+| begin
+|  case	opclass = 011 (move out)
+|	get destination format - this is the also the rounding precision
+|
+|  case	opclass = 0x0
+|	if E3
+|	    *case RndPr(from cmdreg3b{6:5} = 11  then RND_PREC = DBL
+|	    *case RndPr(from cmdreg3b{6:5} = 10  then RND_PREC = SGL
+|	     case RndPr(from cmdreg3b{6:5} = 00 | 01
+|		use precision from FPCR{7:6}
+|			case 00 then RND_PREC = EXT
+|			case 01 then RND_PREC = SGL
+|			case 10 then RND_PREC = DBL
+|	else E1
+|	     use precision in FPCR{7:6}
+|	     case 00 then RND_PREC = EXT
+|	     case 01 then RND_PREC = SGL
+|	     case 10 then RND_PREC = DBL
+| end
+|
+g_rndpr:
+	bsr	g_opcls		|get opclass in d0{2:0}
+	cmpw	#0x0003,%d0	|check for opclass 011
+	bnes	op_0x0
+
+|
+| For move out instructions (opclass 011) the destination format
+| is the same as the rounding precision.  Pass results from g_dfmtou.
+|
+	bsr	g_dfmtou
+	rts
+op_0x0:
+	btstb	#E3,E_BYTE(%a6)
+	beql	unf_e1_exc	|branch to e1 underflow
+unf_e3_exc:
+	movel	CMDREG3B(%a6),%d0	|rounding precision in d0{10:9}
+	bfextu	%d0{#9:#2},%d0	|move the rounding prec bits to d0{1:0}
+	cmpil	#0x2,%d0
+	beql	unff_sgl	|force precision is single
+	cmpil	#0x3,%d0		|force precision is double
+	beql	unff_dbl
+	movew	CMDREG3B(%a6),%d0	|get the command word again
+	andil	#0x7f,%d0		|clear all except operation
+	cmpil	#0x33,%d0
+	beql	unf_fsgl	|fsglmul or fsgldiv
+	cmpil	#0x30,%d0
+	beql	unf_fsgl	|fsgldiv or fsglmul
+	bra	unf_fpcr
+unf_e1_exc:
+	movel	CMDREG1B(%a6),%d0	|get 32 bits off the stack, 1st 16 bits
+|				;are the command word
+	andil	#0x00440000,%d0	|clear all bits except bits 6 and 2
+	cmpil	#0x00400000,%d0
+	beql	unff_sgl	|force single
+	cmpil	#0x00440000,%d0	|force double
+	beql	unff_dbl
+	movel	CMDREG1B(%a6),%d0	|get the command word again
+	andil	#0x007f0000,%d0	|clear all bits except the operation
+	cmpil	#0x00270000,%d0
+	beql	unf_fsgl	|fsglmul
+	cmpil	#0x00240000,%d0
+	beql	unf_fsgl	|fsgldiv
+	bra	unf_fpcr
+
+|
+| Convert to return format.  The values from cmdreg3b and the return
+| values are:
+|	cmdreg3b	return	     precision
+|	--------	------	     ---------
+|	  00,01		  0		ext
+|	   10		  1		sgl
+|	   11		  2		dbl
+| Force single
+|
+unff_sgl:
+	movel	#1,%d0		|return 1
+	rts
+|
+| Force double
+|
+unff_dbl:
+	movel	#2,%d0		|return 2
+	rts
+|
+| Force extended
+|
+unf_fsgl:
+	movel	#0,%d0
+	rts
+|
+| Get rounding precision set in FPCR{7:6}.
+|
+unf_fpcr:
+	movel	USER_FPCR(%a6),%d0 |rounding precision bits in d0{7:6}
+	bfextu	%d0{#24:#2},%d0	|move the rounding prec bits to d0{1:0}
+	rts
+|
+|	g_opcls --- put opclass in d0{2:0}
+|
+g_opcls:
+	btstb	#E3,E_BYTE(%a6)
+	beqs	opc_1b		|if set, go to cmdreg1b
+opc_3b:
+	clrl	%d0		|if E3, only opclass 0x0 is possible
+	rts
+opc_1b:
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#0:#3},%d0	|shift opclass bits d0{31:29} to d0{2:0}
+	rts
+|
+|	g_dfmtou --- put destination format in d0{1:0}
+|
+|	If E1, the format is from cmdreg1b{12:10}
+|	If E3, the format is extended.
+|
+|	Dest. Fmt.
+|		extended  010 -> 00
+|		single    001 -> 01
+|		double    101 -> 10
+|
+g_dfmtou:
+	btstb	#E3,E_BYTE(%a6)
+	beqs	op011
+	clrl	%d0		|if E1, size is always ext
+	rts
+op011:
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#3:#3},%d0	|dest fmt from cmdreg1b{12:10}
+	cmpb	#1,%d0		|check for single
+	bnes	not_sgl
+	movel	#1,%d0
+	rts
+not_sgl:
+	cmpb	#5,%d0		|check for double
+	bnes	not_dbl
+	movel	#2,%d0
+	rts
+not_dbl:
+	clrl	%d0		|must be extended
+	rts
+
+|
+|
+| Final result table for unf_sub. Note that the negative counterparts
+| are unnecessary as unf_sub always returns the sign separately from
+| the exponent.
+|					;+zero
+EXT_PZRO:	.long	0x00000000,0x00000000,0x00000000,0x00000000
+|					;+zero
+SGL_PZRO:	.long	0x3f810000,0x00000000,0x00000000,0x00000000
+|					;+zero
+DBL_PZRO:	.long	0x3c010000,0x00000000,0x00000000,0x00000000
+|					;smallest +ext denorm
+EXT_PSML:	.long	0x00000000,0x00000000,0x00000001,0x00000000
+|					;smallest +sgl denorm
+SGL_PSML:	.long	0x3f810000,0x00000100,0x00000000,0x00000000
+|					;smallest +dbl denorm
+DBL_PSML:	.long	0x3c010000,0x00000000,0x00000800,0x00000000
+|
+|	UNF_SUB --- underflow result calculation
+|
+| Input:
+|	d0	contains round precision
+|	a0	points to input operand in the internal extended format
+|
+| Output:
+|	a0	points to correct internal extended precision result.
+|
+
+tblunf:
+	.long	uEXT_RN
+	.long	uEXT_RZ
+	.long	uEXT_RM
+	.long	uEXT_RP
+	.long	uSGL_RN
+	.long	uSGL_RZ
+	.long	uSGL_RM
+	.long	uSGL_RP
+	.long	uDBL_RN
+	.long	uDBL_RZ
+	.long	uDBL_RM
+	.long	uDBL_RP
+	.long	uDBL_RN
+	.long	uDBL_RZ
+	.long	uDBL_RM
+	.long	uDBL_RP
+
+	.global	unf_sub
+unf_sub:
+	lsll	#2,%d0		|move round precision to d0{3:2}
+	bfextu	FPCR_MODE(%a6){#2:#2},%d1 |set round mode
+	orl	%d1,%d0		|index is fmt:mode in d0{3:0}
+	leal	tblunf,%a1	|load a1 with table address
+	movel	%a1@(%d0:l:4),%a1	|use d0 as index to the table
+	jmp	(%a1)		|go to the correct routine
+|
+|case DEST_FMT = EXT
+|
+uEXT_RN:
+	leal	EXT_PZRO,%a1	|answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign	|now go set the sign
+uEXT_RZ:
+	leal	EXT_PZRO,%a1	|answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign	|now go set the sign
+uEXT_RM:
+	tstb	LOCAL_SGN(%a0)	|if negative underflow
+	beqs	ue_rm_pos
+ue_rm_neg:
+	leal	EXT_PSML,%a1	|answer is negative smallest denorm
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_unfr
+ue_rm_pos:
+	leal	EXT_PZRO,%a1	|answer is positive zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	end_unfr
+uEXT_RP:
+	tstb	LOCAL_SGN(%a0)	|if negative underflow
+	beqs	ue_rp_pos
+ue_rp_neg:
+	leal	EXT_PZRO,%a1	|answer is negative zero
+	oril	#negz_mask,USER_FPSR(%a6)
+	bra	end_unfr
+ue_rp_pos:
+	leal	EXT_PSML,%a1	|answer is positive smallest denorm
+	bra	end_unfr
+|
+|case DEST_FMT = DBL
+|
+uDBL_RN:
+	leal	DBL_PZRO,%a1	|answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign
+uDBL_RZ:
+	leal	DBL_PZRO,%a1	|answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	uset_sign	|now go set the sign
+uDBL_RM:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	ud_rm_pos
+ud_rm_neg:
+	leal	DBL_PSML,%a1	|answer is smallest denormalized negative
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bra	end_unfr
+ud_rm_pos:
+	leal	DBL_PZRO,%a1	|answer is positive zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bra	end_unfr
+uDBL_RP:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	ud_rp_pos
+ud_rp_neg:
+	leal	DBL_PZRO,%a1	|answer is negative zero
+	oril	#negz_mask,USER_FPSR(%a6)
+	bra	end_unfr
+ud_rp_pos:
+	leal	DBL_PSML,%a1	|answer is smallest denormalized negative
+	bra	end_unfr
+|
+|case DEST_FMT = SGL
+|
+uSGL_RN:
+	leal	SGL_PZRO,%a1	|answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bras	uset_sign
+uSGL_RZ:
+	leal	SGL_PZRO,%a1	|answer is +/- zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bras	uset_sign
+uSGL_RM:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	us_rm_pos
+us_rm_neg:
+	leal	SGL_PSML,%a1	|answer is smallest denormalized negative
+	bsetb	#neg_bit,FPSR_CC(%a6)
+	bras	end_unfr
+us_rm_pos:
+	leal	SGL_PZRO,%a1	|answer is positive zero
+	bsetb	#z_bit,FPSR_CC(%a6)
+	bras	end_unfr
+uSGL_RP:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	us_rp_pos
+us_rp_neg:
+	leal	SGL_PZRO,%a1	|answer is negative zero
+	oril	#negz_mask,USER_FPSR(%a6)
+	bras	end_unfr
+us_rp_pos:
+	leal	SGL_PSML,%a1	|answer is smallest denormalized positive
+	bras	end_unfr
+
+uset_sign:
+	tstb	LOCAL_SGN(%a0)	|if negative overflow
+	beqs	end_unfr
+uneg_sign:
+	bsetb	#neg_bit,FPSR_CC(%a6)
+
+end_unfr:
+	movew	LOCAL_EX(%a1),LOCAL_EX(%a0) |be careful not to overwrite sign
+	movel	LOCAL_HI(%a1),LOCAL_HI(%a0)
+	movel	LOCAL_LO(%a1),LOCAL_LO(%a0)
+	rts
+|
+|	reg_dest --- write byte, word, or long data to Dn
+|
+|
+| Input:
+|	L_SCR1: Data
+|	d1:     data size and dest register number formatted as:
+|
+|	32		5    4     3     2     1     0
+|       -----------------------------------------------
+|       |        0        |    Size   |  Dest Reg #   |
+|       -----------------------------------------------
+|
+|	Size is:
+|		0 - Byte
+|		1 - Word
+|		2 - Long/Single
+|
+pregdst:
+	.long	byte_d0
+	.long	byte_d1
+	.long	byte_d2
+	.long	byte_d3
+	.long	byte_d4
+	.long	byte_d5
+	.long	byte_d6
+	.long	byte_d7
+	.long	word_d0
+	.long	word_d1
+	.long	word_d2
+	.long	word_d3
+	.long	word_d4
+	.long	word_d5
+	.long	word_d6
+	.long	word_d7
+	.long	long_d0
+	.long	long_d1
+	.long	long_d2
+	.long	long_d3
+	.long	long_d4
+	.long	long_d5
+	.long	long_d6
+	.long	long_d7
+
+reg_dest:
+	leal	pregdst,%a0
+	movel	%a0@(%d1:l:4),%a0
+	jmp	(%a0)
+
+byte_d0:
+	moveb	L_SCR1(%a6),USER_D0+3(%a6)
+	rts
+byte_d1:
+	moveb	L_SCR1(%a6),USER_D1+3(%a6)
+	rts
+byte_d2:
+	moveb	L_SCR1(%a6),%d2
+	rts
+byte_d3:
+	moveb	L_SCR1(%a6),%d3
+	rts
+byte_d4:
+	moveb	L_SCR1(%a6),%d4
+	rts
+byte_d5:
+	moveb	L_SCR1(%a6),%d5
+	rts
+byte_d6:
+	moveb	L_SCR1(%a6),%d6
+	rts
+byte_d7:
+	moveb	L_SCR1(%a6),%d7
+	rts
+word_d0:
+	movew	L_SCR1(%a6),USER_D0+2(%a6)
+	rts
+word_d1:
+	movew	L_SCR1(%a6),USER_D1+2(%a6)
+	rts
+word_d2:
+	movew	L_SCR1(%a6),%d2
+	rts
+word_d3:
+	movew	L_SCR1(%a6),%d3
+	rts
+word_d4:
+	movew	L_SCR1(%a6),%d4
+	rts
+word_d5:
+	movew	L_SCR1(%a6),%d5
+	rts
+word_d6:
+	movew	L_SCR1(%a6),%d6
+	rts
+word_d7:
+	movew	L_SCR1(%a6),%d7
+	rts
+long_d0:
+	movel	L_SCR1(%a6),USER_D0(%a6)
+	rts
+long_d1:
+	movel	L_SCR1(%a6),USER_D1(%a6)
+	rts
+long_d2:
+	movel	L_SCR1(%a6),%d2
+	rts
+long_d3:
+	movel	L_SCR1(%a6),%d3
+	rts
+long_d4:
+	movel	L_SCR1(%a6),%d4
+	rts
+long_d5:
+	movel	L_SCR1(%a6),%d5
+	rts
+long_d6:
+	movel	L_SCR1(%a6),%d6
+	rts
+long_d7:
+	movel	L_SCR1(%a6),%d7
+	rts
+	|end
diff --git a/arch/m68k/fpsp040/x_bsun.S b/arch/m68k/fpsp040/x_bsun.S
new file mode 100644
index 0000000..039247b
--- /dev/null
+++ b/arch/m68k/fpsp040/x_bsun.S
@@ -0,0 +1,47 @@
+|
+|	x_bsun.sa 3.3 7/1/91
+|
+|	fpsp_bsun --- FPSP handler for branch/set on unordered exception
+|
+|	Copy the PC to FPIAR to maintain 881/882 compatibility
+|
+|	The real_bsun handler will need to perform further corrective
+|	measures as outlined in the 040 User's Manual on pages
+|	9-41f, section 9.8.3.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_BSUN:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	real_bsun
+
+	.global	fpsp_bsun
+fpsp_bsun:
+|
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+|
+	movel		EXC_PC(%a6),USER_FPIAR(%a6)
+|
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_bsun
+|
+	|end
diff --git a/arch/m68k/fpsp040/x_fline.S b/arch/m68k/fpsp040/x_fline.S
new file mode 100644
index 0000000..3917710
--- /dev/null
+++ b/arch/m68k/fpsp040/x_fline.S
@@ -0,0 +1,104 @@
+|
+|	x_fline.sa 3.3 1/10/91
+|
+|	fpsp_fline --- FPSP handler for fline exception
+|
+|	First determine if the exception is one of the unimplemented
+|	floating point instructions.  If so, let fpsp_unimp handle it.
+|	Next, determine if the instruction is an fmovecr with a non-zero
+|	<ea> field.  If so, handle here and return.  Otherwise, it
+|	must be a real F-line exception.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_FLINE:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	real_fline
+	|xref	fpsp_unimp
+	|xref	uni_2
+	|xref	mem_read
+	|xref	fpsp_fmt_error
+
+	.global	fpsp_fline
+fpsp_fline:
+|
+|	check for unimplemented vector first.  Use EXC_VEC-4 because
+|	the equate is valid only after a 'link a6' has pushed one more
+|	long onto the stack.
+|
+	cmpw	#UNIMP_VEC,EXC_VEC-4(%a7)
+	beql	fpsp_unimp
+
+|
+|	fmovecr with non-zero <ea> handling here
+|
+	subl	#4,%a7		|4 accounts for 2-word difference
+|				;between six word frame (unimp) and
+|				;four word frame
+	link	%a6,#-LOCAL_SIZE
+	fsave	-(%a7)
+	moveml	%d0-%d1/%a0-%a1,USER_DA(%a6)
+	moveal	EXC_PC+4(%a6),%a0	|get address of fline instruction
+	leal	L_SCR1(%a6),%a1	|use L_SCR1 as scratch
+	movel	#4,%d0
+	addl	#4,%a6		|to offset the sub.l #4,a7 above so that
+|				;a6 can point correctly to the stack frame
+|				;before branching to mem_read
+	bsrl	mem_read
+	subl	#4,%a6
+	movel	L_SCR1(%a6),%d0	|d0 contains the fline and command word
+	bfextu	%d0{#4:#3},%d1	|extract coprocessor id
+	cmpib	#1,%d1		|check if cpid=1
+	bne	not_mvcr	|exit if not
+	bfextu	%d0{#16:#6},%d1
+	cmpib	#0x17,%d1		|check if it is an FMOVECR encoding
+	bne	not_mvcr
+|				;if an FMOVECR instruction, fix stack
+|				;and go to FPSP_UNIMP
+fix_stack:
+	cmpib	#VER_40,(%a7)	|test for orig unimp frame
+	bnes	ck_rev
+	subl	#UNIMP_40_SIZE-4,%a7 |emulate an orig fsave
+	moveb	#VER_40,(%a7)
+	moveb	#UNIMP_40_SIZE-4,1(%a7)
+	clrw	2(%a7)
+	bras	fix_con
+ck_rev:
+	cmpib	#VER_41,(%a7)	|test for rev unimp frame
+	bnel	fpsp_fmt_error	|if not $40 or $41, exit with error
+	subl	#UNIMP_41_SIZE-4,%a7 |emulate a rev fsave
+	moveb	#VER_41,(%a7)
+	moveb	#UNIMP_41_SIZE-4,1(%a7)
+	clrw	2(%a7)
+fix_con:
+	movew	EXC_SR+4(%a6),EXC_SR(%a6) |move stacked sr to new position
+	movel	EXC_PC+4(%a6),EXC_PC(%a6) |move stacked pc to new position
+	fmovel	EXC_PC(%a6),%FPIAR |point FPIAR to fline inst
+	movel	#4,%d1
+	addl	%d1,EXC_PC(%a6)	|increment stacked pc value to next inst
+	movew	#0x202c,EXC_VEC(%a6) |reformat vector to unimp
+	clrl	EXC_EA(%a6)	|clear the EXC_EA field
+	movew	%d0,CMDREG1B(%a6) |move the lower word into CMDREG1B
+	clrl	E_BYTE(%a6)
+	bsetb	#UFLAG,T_BYTE(%a6)
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
+	bral	uni_2
+
+not_mvcr:
+	moveml	USER_DA(%a6),%d0-%d1/%a0-%a1 |restore data registers
+	frestore (%a7)+
+	unlk	%a6
+	addl	#4,%a7
+	bral	real_fline
+
+	|end
diff --git a/arch/m68k/fpsp040/x_operr.S b/arch/m68k/fpsp040/x_operr.S
new file mode 100644
index 0000000..b0f54bc
--- /dev/null
+++ b/arch/m68k/fpsp040/x_operr.S
@@ -0,0 +1,356 @@
+|
+|	x_operr.sa 3.5 7/1/91
+|
+|	fpsp_operr --- FPSP handler for operand error exception
+|
+|	See 68040 User's Manual pp. 9-44f
+|
+| Note 1: For trap disabled 040 does the following:
+| If the dest is a fp reg, then an extended precision non_signaling
+| NAN is stored in the dest reg.  If the dest format is b, w, or l and
+| the source op is a NAN, then garbage is stored as the result (actually
+| the upper 32 bits of the mantissa are sent to the integer unit). If
+| the dest format is integer (b, w, l) and the operr is caused by
+| integer overflow, or the source op is inf, then the result stored is
+| garbage.
+| There are three cases in which operr is incorrectly signaled on the
+| 040.  This occurs for move_out of format b, w, or l for the largest
+| negative integer (-2^7 for b, -2^15 for w, -2^31 for l).
+|
+|	  On opclass = 011 fmove.(b,w,l) that causes a conversion
+|	  overflow -> OPERR, the exponent in wbte (and fpte) is:
+|		byte    56 - (62 - exp)
+|		word    48 - (62 - exp)
+|		long    32 - (62 - exp)
+|
+|			where exp = (true exp) - 1
+|
+|  So, wbtemp and fptemp will contain the following on erroneously
+|	  signalled operr:
+|			fpts = 1
+|			fpte = $4000  (15 bit externally)
+|		byte	fptm = $ffffffff ffffff80
+|		word	fptm = $ffffffff ffff8000
+|		long	fptm = $ffffffff 80000000
+|
+| Note 2: For trap enabled 040 does the following:
+| If the inst is move_out, then same as Note 1.
+| If the inst is not move_out, the dest is not modified.
+| The exceptional operand is not defined for integer overflow
+| during a move_out.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_OPERR:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	mem_write
+	|xref	real_operr
+	|xref	real_inex
+	|xref	get_fline
+	|xref	fpsp_done
+	|xref	reg_dest
+
+	.global	fpsp_operr
+fpsp_operr:
+|
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+|
+| Check if this is an opclass 3 instruction.
+|  If so, fall through, else branch to operr_end
+|
+	btstb	#TFLAG,T_BYTE(%a6)
+	beqs	operr_end
+
+|
+| If the destination size is B,W,or L, the operr must be
+| handled here.
+|
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#3:#3},%d0	|0=long, 4=word, 6=byte
+	cmpib	#0,%d0		|determine size; check long
+	beq	operr_long
+	cmpib	#4,%d0		|check word
+	beq	operr_word
+	cmpib	#6,%d0		|check byte
+	beq	operr_byte
+
+|
+| The size is not B,W,or L, so the operr is handled by the
+| kernel handler.  Set the operr bits and clean up, leaving
+| only the integer exception frame on the stack, and the
+| fpu in the original exceptional state.
+|
+operr_end:
+	bsetb		#operr_bit,FPSR_EXCEPT(%a6)
+	bsetb		#aiop_bit,FPSR_AEXCEPT(%a6)
+
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_operr
+
+operr_long:
+	moveql	#4,%d1		|write size to d1
+	moveb	STAG(%a6),%d0	|test stag for nan
+	andib	#0xe0,%d0		|clr all but tag
+	cmpib	#0x60,%d0		|check for nan
+	beq	operr_nan
+	cmpil	#0x80000000,FPTEMP_LO(%a6) |test if ls lword is special
+	bnes	chklerr		|if not equal, check for incorrect operr
+	bsr	check_upper	|check if exp and ms mant are special
+	tstl	%d0
+	bnes	chklerr		|if d0 is true, check for incorrect operr
+	movel	#0x80000000,%d0	|store special case result
+	bsr	operr_store
+	bra	not_enabled	|clean and exit
+|
+|	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+|
+chklerr:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	|ignore sign bit
+	cmpw	#0x3FFE,%d0	|this is the only possible exponent value
+	bnes	chklerr2
+fixlong:
+	movel	FPTEMP_LO(%a6),%d0
+	bsr	operr_store
+	bra	not_enabled
+chklerr2:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	|ignore sign bit
+	cmpw	#0x4000,%d0
+	bcc	store_max	|exponent out of range
+
+	movel	FPTEMP_LO(%a6),%d0
+	andl	#0x7FFF0000,%d0	|look for all 1's on bits 30-16
+	cmpl	#0x7FFF0000,%d0
+	beqs	fixlong
+
+	tstl	FPTEMP_LO(%a6)
+	bpls	chklepos
+	cmpl	#0xFFFFFFFF,FPTEMP_HI(%a6)
+	beqs	fixlong
+	bra	store_max
+chklepos:
+	tstl	FPTEMP_HI(%a6)
+	beqs	fixlong
+	bra	store_max
+
+operr_word:
+	moveql	#2,%d1		|write size to d1
+	moveb	STAG(%a6),%d0	|test stag for nan
+	andib	#0xe0,%d0		|clr all but tag
+	cmpib	#0x60,%d0		|check for nan
+	beq	operr_nan
+	cmpil	#0xffff8000,FPTEMP_LO(%a6) |test if ls lword is special
+	bnes	chkwerr		|if not equal, check for incorrect operr
+	bsr	check_upper	|check if exp and ms mant are special
+	tstl	%d0
+	bnes	chkwerr		|if d0 is true, check for incorrect operr
+	movel	#0x80000000,%d0	|store special case result
+	bsr	operr_store
+	bra	not_enabled	|clean and exit
+|
+|	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+|
+chkwerr:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	|ignore sign bit
+	cmpw	#0x3FFE,%d0	|this is the only possible exponent value
+	bnes	store_max
+	movel	FPTEMP_LO(%a6),%d0
+	swap	%d0
+	bsr	operr_store
+	bra	not_enabled
+
+operr_byte:
+	moveql	#1,%d1		|write size to d1
+	moveb	STAG(%a6),%d0	|test stag for nan
+	andib	#0xe0,%d0		|clr all but tag
+	cmpib	#0x60,%d0		|check for nan
+	beqs	operr_nan
+	cmpil	#0xffffff80,FPTEMP_LO(%a6) |test if ls lword is special
+	bnes	chkberr		|if not equal, check for incorrect operr
+	bsr	check_upper	|check if exp and ms mant are special
+	tstl	%d0
+	bnes	chkberr		|if d0 is true, check for incorrect operr
+	movel	#0x80000000,%d0	|store special case result
+	bsr	operr_store
+	bra	not_enabled	|clean and exit
+|
+|	CHECK FOR INCORRECTLY GENERATED OPERR EXCEPTION HERE
+|
+chkberr:
+	movew	FPTEMP_EX(%a6),%d0
+	andw	#0x7FFF,%d0	|ignore sign bit
+	cmpw	#0x3FFE,%d0	|this is the only possible exponent value
+	bnes	store_max
+	movel	FPTEMP_LO(%a6),%d0
+	asll	#8,%d0
+	swap	%d0
+	bsr	operr_store
+	bra	not_enabled
+
+|
+| This operr condition is not of the special case.  Set operr
+| and aiop and write the portion of the nan to memory for the
+| given size.
+|
+operr_nan:
+	orl	#opaop_mask,USER_FPSR(%a6) |set operr & aiop
+
+	movel	ETEMP_HI(%a6),%d0	|output will be from upper 32 bits
+	bsr	operr_store
+	bra	end_operr
+|
+| Store_max loads the max pos or negative for the size, sets
+| the operr and aiop bits, and clears inex and ainex, incorrectly
+| set by the 040.
+|
+store_max:
+	orl	#opaop_mask,USER_FPSR(%a6) |set operr & aiop
+	bclrb	#inex2_bit,FPSR_EXCEPT(%a6)
+	bclrb	#ainex_bit,FPSR_AEXCEPT(%a6)
+	fmovel	#0,%FPSR
+
+	tstw	FPTEMP_EX(%a6)	|check sign
+	blts	load_neg
+	movel	#0x7fffffff,%d0
+	bsr	operr_store
+	bra	end_operr
+load_neg:
+	movel	#0x80000000,%d0
+	bsr	operr_store
+	bra	end_operr
+
+|
+| This routine stores the data in d0, for the given size in d1,
+| to memory or data register as required.  A read of the fline
+| is required to determine the destination.
+|
+operr_store:
+	movel	%d0,L_SCR1(%a6)	|move write data to L_SCR1
+	movel	%d1,-(%a7)	|save register size
+	bsrl	get_fline	|fline returned in d0
+	movel	(%a7)+,%d1
+	bftst	%d0{#26:#3}		|if mode is zero, dest is Dn
+	bnes	dest_mem
+|
+| Destination is Dn.  Get register number from d0. Data is on
+| the stack at (a7). D1 has size: 1=byte,2=word,4=long/single
+|
+	andil	#7,%d0		|isolate register number
+	cmpil	#4,%d1
+	beqs	op_long		|the most frequent case
+	cmpil	#2,%d1
+	bnes	op_con
+	orl	#8,%d0
+	bras	op_con
+op_long:
+	orl	#0x10,%d0
+op_con:
+	movel	%d0,%d1		|format size:reg for reg_dest
+	bral	reg_dest	|call to reg_dest returns to caller
+|				;of operr_store
+|
+| Destination is memory.  Get <ea> from integer exception frame
+| and call mem_write.
+|
+dest_mem:
+	leal	L_SCR1(%a6),%a0	|put ptr to write data in a0
+	movel	EXC_EA(%a6),%a1	|put user destination address in a1
+	movel	%d1,%d0		|put size in d0
+	bsrl	mem_write
+	rts
+|
+| Check the exponent for $c000 and the upper 32 bits of the
+| mantissa for $ffffffff.  If both are true, return d0 clr
+| and store the lower n bits of the least lword of FPTEMP
+| to d0 for write out.  If not, it is a real operr, and set d0.
+|
+check_upper:
+	cmpil	#0xffffffff,FPTEMP_HI(%a6) |check if first byte is all 1's
+	bnes	true_operr	|if not all 1's then was true operr
+	cmpiw	#0xc000,FPTEMP_EX(%a6) |check if incorrectly signalled
+	beqs	not_true_operr	|branch if not true operr
+	cmpiw	#0xbfff,FPTEMP_EX(%a6) |check if incorrectly signalled
+	beqs	not_true_operr	|branch if not true operr
+true_operr:
+	movel	#1,%d0		|signal real operr
+	rts
+not_true_operr:
+	clrl	%d0		|signal no real operr
+	rts
+
+|
+| End_operr tests for operr enabled.  If not, it cleans up the stack
+| and does an rte.  If enabled, it cleans up the stack and branches
+| to the kernel operr handler with only the integer exception
+| frame on the stack and the fpu in the original exceptional state
+| with correct data written to the destination.
+|
+end_operr:
+	btstb		#operr_bit,FPCR_ENABLE(%a6)
+	beqs		not_enabled
+enabled:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_operr
+
+not_enabled:
+|
+| It is possible to have either inex2 or inex1 exceptions with the
+| operr.  If the inex enable bit is set in the FPCR, and either
+| inex2 or inex1 occurred, we must clean up and branch to the
+| real inex handler.
+|
+ck_inex:
+	moveb	FPCR_ENABLE(%a6),%d0
+	andb	FPSR_EXCEPT(%a6),%d0
+	andib	#0x3,%d0
+	beq	operr_exit
+|
+| Inexact enabled and reported, and we must take an inexact exception.
+|
+take_inex:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_inex
+|
+| Since operr is only an E1 exception, there is no need to frestore
+| any state back to the fpu.
+|
+operr_exit:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+
+	|end
diff --git a/arch/m68k/fpsp040/x_ovfl.S b/arch/m68k/fpsp040/x_ovfl.S
new file mode 100644
index 0000000..22cb8b4
--- /dev/null
+++ b/arch/m68k/fpsp040/x_ovfl.S
@@ -0,0 +1,186 @@
+|
+|	x_ovfl.sa 3.5 7/1/91
+|
+|	fpsp_ovfl --- FPSP handler for overflow exception
+|
+|	Overflow occurs when a floating-point intermediate result is
+|	too large to be represented in a floating-point data register,
+|	or when storing to memory, the contents of a floating-point
+|	data register are too large to be represented in the
+|	destination format.
+|
+| Trap disabled results
+|
+| If the instruction is move_out, then garbage is stored in the
+| destination.  If the instruction is not move_out, then the
+| destination is not affected.  For 68881 compatibility, the
+| following values should be stored at the destination, based
+| on the current rounding mode:
+|
+|  RN	Infinity with the sign of the intermediate result.
+|  RZ	Largest magnitude number, with the sign of the
+|	intermediate result.
+|  RM   For pos overflow, the largest pos number. For neg overflow,
+|	-infinity
+|  RP   For pos overflow, +infinity. For neg overflow, the largest
+|	neg number
+|
+| Trap enabled results
+| All trap disabled code applies.  In addition the exceptional
+| operand needs to be made available to the users exception handler
+| with a bias of $6000 subtracted from the exponent.
+|
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_OVFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	ovf_r_x2
+	|xref	ovf_r_x3
+	|xref	store
+	|xref	real_ovfl
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	g_opcls
+	|xref	b1238_fix
+
+	.global	fpsp_ovfl
+fpsp_ovfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+|
+|	The 040 doesn't set the AINEX bit in the FPSR, the following
+|	line temporarily rectifies this error.
+|
+	bsetb	#ainex_bit,FPSR_AEXCEPT(%a6)
+|
+	bsrl	ovf_adj		|denormalize, round & store interm op
+|
+|	if overflow traps not enabled check for inexact exception
+|
+	btstb	#ovfl_bit,FPCR_ENABLE(%a6)
+	beqs	ck_inex
+|
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_1
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_1:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_ovfl
+|
+| It is possible to have either inex2 or inex1 exceptions with the
+| ovfl.  If the inex enable bit is set in the FPCR, and either
+| inex2 or inex1 occurred, we must clean up and branch to the
+| real inex handler.
+|
+ck_inex:
+|	move.b		FPCR_ENABLE(%a6),%d0
+|	and.b		FPSR_EXCEPT(%a6),%d0
+|	andi.b		#$3,%d0
+	btstb		#inex2_bit,FPCR_ENABLE(%a6)
+	beqs		ovfl_exit
+|
+| Inexact enabled and reported, and we must take an inexact exception.
+|
+take_inex:
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_2
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_2:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_inex
+
+ovfl_exit:
+	bclrb	#E3,E_BYTE(%a6)	|test and clear E3 bit
+	beqs	e1_set
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+e1_set:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+
+|
+|	ovf_adj
+|
+ovf_adj:
+|
+| Have a0 point to the correct operand.
+|
+	btstb	#E3,E_BYTE(%a6)	|test E3 bit
+	beqs	ovf_e1
+
+	lea	WBTEMP(%a6),%a0
+	bras	ovf_com
+ovf_e1:
+	lea	ETEMP(%a6),%a0
+
+ovf_com:
+	bclrb	#sign_bit,LOCAL_EX(%a0)
+	sne	LOCAL_SGN(%a0)
+
+	bsrl	g_opcls		|returns opclass in d0
+	cmpiw	#3,%d0		|check for opclass3
+	bnes	not_opc011
+
+|
+| FPSR_CC is saved and restored because ovf_r_x3 affects it. The
+| CCs are defined to be 'not affected' for the opclass3 instruction.
+|
+	moveb	FPSR_CC(%a6),L_SCR1(%a6)
+	bsrl	ovf_r_x3	|returns a0 pointing to result
+	moveb	L_SCR1(%a6),FPSR_CC(%a6)
+	bral	store		|stores to memory or register
+
+not_opc011:
+	bsrl	ovf_r_x2	|returns a0 pointing to result
+	bral	store		|stores to memory or register
+
+	|end
diff --git a/arch/m68k/fpsp040/x_snan.S b/arch/m68k/fpsp040/x_snan.S
new file mode 100644
index 0000000..039af573
--- /dev/null
+++ b/arch/m68k/fpsp040/x_snan.S
@@ -0,0 +1,277 @@
+|
+|	x_snan.sa 3.3 7/1/91
+|
+| fpsp_snan --- FPSP handler for signalling NAN exception
+|
+| SNAN for float -> integer conversions (integer conversion of
+| an SNAN) is a non-maskable run-time exception.
+|
+| For trap disabled the 040 does the following:
+| If the dest data format is s, d, or x, then the SNAN bit in the NAN
+| is set to one and the resulting non-signaling NAN (truncated if
+| necessary) is transferred to the dest.  If the dest format is b, w,
+| or l, then garbage is written to the dest (actually the upper 32 bits
+| of the mantissa are sent to the integer unit).
+|
+| For trap enabled the 040 does the following:
+| If the inst is move_out, then the results are the same as for trap
+| disabled with the exception posted.  If the instruction is not move_
+| out, the dest. is not modified, and the exception is posted.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_SNAN:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	get_fline
+	|xref	mem_write
+	|xref	real_snan
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	reg_dest
+
+	.global	fpsp_snan
+fpsp_snan:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+|
+| Check if trap enabled
+|
+	btstb		#snan_bit,FPCR_ENABLE(%a6)
+	bnes		ena		|If enabled, then branch
+
+	bsrl		move_out	|else SNAN disabled
+|
+| It is possible to have an inex1 exception with the
+| snan.  If the inex enable bit is set in the FPCR, and either
+| inex2 or inex1 occurred, we must clean up and branch to the
+| real inex handler.
+|
+ck_inex:
+	moveb	FPCR_ENABLE(%a6),%d0
+	andb	FPSR_EXCEPT(%a6),%d0
+	andib	#0x3,%d0
+	beq	end_snan
+|
+| Inexact enabled and reported, and we must take an inexact exception.
+|
+take_inex:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_inex
+|
+| SNAN is enabled.  Check if inst is move_out.
+| Make any corrections to the 040 output as necessary.
+|
+ena:
+	btstb		#5,CMDREG1B(%a6) |if set, inst is move out
+	beq		not_out
+
+	bsrl		move_out
+
+report_snan:
+	moveb		(%a7),VER_TMP(%a6)
+	cmpib		#VER_40,(%a7)	|test for orig unimp frame
+	bnes		ck_rev
+	moveql		#13,%d0		|need to zero 14 lwords
+	bras		rep_con
+ck_rev:
+	moveql		#11,%d0		|need to zero 12 lwords
+rep_con:
+	clrl		(%a7)
+loop1:
+	clrl		-(%a7)		|clear and dec a7
+	dbra		%d0,loop1
+	moveb		VER_TMP(%a6),(%a7) |format a busy frame
+	moveb		#BUSY_SIZE-4,1(%a7)
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_snan
+|
+| Exit snan handler by expanding the unimp frame into a busy frame
+|
+end_snan:
+	bclrb		#E1,E_BYTE(%a6)
+
+	moveb		(%a7),VER_TMP(%a6)
+	cmpib		#VER_40,(%a7)	|test for orig unimp frame
+	bnes		ck_rev2
+	moveql		#13,%d0		|need to zero 14 lwords
+	bras		rep_con2
+ck_rev2:
+	moveql		#11,%d0		|need to zero 12 lwords
+rep_con2:
+	clrl		(%a7)
+loop2:
+	clrl		-(%a7)		|clear and dec a7
+	dbra		%d0,loop2
+	moveb		VER_TMP(%a6),(%a7) |format a busy frame
+	moveb		#BUSY_SIZE-4,1(%a7) |write busy size
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+
+|
+| Move_out
+|
+move_out:
+	movel		EXC_EA(%a6),%a0	|get <ea> from exc frame
+
+	bfextu		CMDREG1B(%a6){#3:#3},%d0 |move rx field to d0{2:0}
+	cmpil		#0,%d0		|check for long
+	beqs		sto_long	|branch if move_out long
+
+	cmpil		#4,%d0		|check for word
+	beqs		sto_word	|branch if move_out word
+
+	cmpil		#6,%d0		|check for byte
+	beqs		sto_byte	|branch if move_out byte
+
+|
+| Not byte, word or long
+|
+	rts
+|
+| Get the 32 most significant bits of etemp mantissa
+|
+sto_long:
+	movel		ETEMP_HI(%a6),%d1
+	movel		#4,%d0		|load byte count
+|
+| Set signalling nan bit
+|
+	bsetl		#30,%d1
+|
+| Store to the users destination address
+|
+	tstl		%a0		|check if <ea> is 0
+	beqs		wrt_dn		|destination is a data register
+
+	movel		%d1,-(%a7)	|move the snan onto the stack
+	movel		%a0,%a1		|load dest addr into a1
+	movel		%a7,%a0		|load src addr of snan into a0
+	bsrl		mem_write	|write snan to user memory
+	movel		(%a7)+,%d1	|clear off stack
+	rts
+|
+| Get the 16 most significant bits of etemp mantissa
+|
+sto_word:
+	movel		ETEMP_HI(%a6),%d1
+	movel		#2,%d0		|load byte count
+|
+| Set signalling nan bit
+|
+	bsetl		#30,%d1
+|
+| Store to the users destination address
+|
+	tstl		%a0		|check if <ea> is 0
+	beqs		wrt_dn		|destination is a data register
+
+	movel		%d1,-(%a7)	|move the snan onto the stack
+	movel		%a0,%a1		|load dest addr into a1
+	movel		%a7,%a0		|point to low word
+	bsrl		mem_write	|write snan to user memory
+	movel		(%a7)+,%d1	|clear off stack
+	rts
+|
+| Get the 8 most significant bits of etemp mantissa
+|
+sto_byte:
+	movel		ETEMP_HI(%a6),%d1
+	movel		#1,%d0		|load byte count
+|
+| Set signalling nan bit
+|
+	bsetl		#30,%d1
+|
+| Store to the users destination address
+|
+	tstl		%a0		|check if <ea> is 0
+	beqs		wrt_dn		|destination is a data register
+	movel		%d1,-(%a7)	|move the snan onto the stack
+	movel		%a0,%a1		|load dest addr into a1
+	movel		%a7,%a0		|point to source byte
+	bsrl		mem_write	|write snan to user memory
+	movel		(%a7)+,%d1	|clear off stack
+	rts
+
+|
+|	wrt_dn --- write to a data register
+|
+|	We get here with D1 containing the data to write and D0 the
+|	number of bytes to write: 1=byte,2=word,4=long.
+|
+wrt_dn:
+	movel		%d1,L_SCR1(%a6)	|data
+	movel		%d0,-(%a7)	|size
+	bsrl		get_fline	|returns fline word in d0
+	movel		%d0,%d1
+	andil		#0x7,%d1		|d1 now holds register number
+	movel		(%sp)+,%d0	|get original size
+	cmpil		#4,%d0
+	beqs		wrt_long
+	cmpil		#2,%d0
+	bnes		wrt_byte
+wrt_word:
+	orl		#0x8,%d1
+	bral		reg_dest
+wrt_long:
+	orl		#0x10,%d1
+	bral		reg_dest
+wrt_byte:
+	bral		reg_dest
+|
+| Check if it is a src nan or dst nan
+|
+not_out:
+	movel		DTAG(%a6),%d0
+	bfextu		%d0{#0:#3},%d0	|isolate dtag in lsbs
+
+	cmpib		#3,%d0		|check for nan in destination
+	bnes		issrc		|destination nan has priority
+dst_nan:
+	btstb		#6,FPTEMP_HI(%a6) |check if dest nan is an snan
+	bnes		issrc		|no, so check source for snan
+	movew		FPTEMP_EX(%a6),%d0
+	bras		cont
+issrc:
+	movew		ETEMP_EX(%a6),%d0
+cont:
+	btstl		#15,%d0		|test for sign of snan
+	beqs		clr_neg
+	bsetb		#neg_bit,FPSR_CC(%a6)
+	bra		report_snan
+clr_neg:
+	bclrb		#neg_bit,FPSR_CC(%a6)
+	bra		report_snan
+
+	|end
diff --git a/arch/m68k/fpsp040/x_store.S b/arch/m68k/fpsp040/x_store.S
new file mode 100644
index 0000000..4282fa6
--- /dev/null
+++ b/arch/m68k/fpsp040/x_store.S
@@ -0,0 +1,256 @@
+|
+|	x_store.sa 3.2 1/24/91
+|
+|	store --- store operand to memory or register
+|
+|	Used by underflow and overflow handlers.
+|
+|	a6 = points to fp value to be stored.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_STORE:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+fpreg_mask:
+	.byte	0x80,0x40,0x20,0x10,0x08,0x04,0x02,0x01
+
+#include "fpsp.h"
+
+	|xref	mem_write
+	|xref	get_fline
+	|xref	g_opcls
+	|xref	g_dfmtou
+	|xref	reg_dest
+
+	.global	dest_ext
+	.global	dest_dbl
+	.global	dest_sgl
+
+	.global	store
+store:
+	btstb	#E3,E_BYTE(%a6)
+	beqs	E1_sto
+E3_sto:
+	movel	CMDREG3B(%a6),%d0
+	bfextu	%d0{#6:#3},%d0		|isolate dest. reg from cmdreg3b
+sto_fp:
+	lea	fpreg_mask,%a1
+	moveb	(%a1,%d0.w),%d0		|convert reg# to dynamic register mask
+	tstb	LOCAL_SGN(%a0)
+	beqs	is_pos
+	bsetb	#sign_bit,LOCAL_EX(%a0)
+is_pos:
+	fmovemx (%a0),%d0		|move to correct register
+|
+|	if fp0-fp3 is being modified, we must put a copy
+|	in the USER_FPn variable on the stack because all exception
+|	handlers restore fp0-fp3 from there.
+|
+	cmpb	#0x80,%d0
+	bnes	not_fp0
+	fmovemx %fp0-%fp0,USER_FP0(%a6)
+	rts
+not_fp0:
+	cmpb	#0x40,%d0
+	bnes	not_fp1
+	fmovemx %fp1-%fp1,USER_FP1(%a6)
+	rts
+not_fp1:
+	cmpb	#0x20,%d0
+	bnes	not_fp2
+	fmovemx %fp2-%fp2,USER_FP2(%a6)
+	rts
+not_fp2:
+	cmpb	#0x10,%d0
+	bnes	not_fp3
+	fmovemx %fp3-%fp3,USER_FP3(%a6)
+	rts
+not_fp3:
+	rts
+
+E1_sto:
+	bsrl	g_opcls		|returns opclass in d0
+	cmpib	#3,%d0
+	beq	opc011		|branch if opclass 3
+	movel	CMDREG1B(%a6),%d0
+	bfextu	%d0{#6:#3},%d0	|extract destination register
+	bras	sto_fp
+
+opc011:
+	bsrl	g_dfmtou	|returns dest format in d0
+|				;ext=00, sgl=01, dbl=10
+	movel	%a0,%a1		|save source addr in a1
+	movel	EXC_EA(%a6),%a0	|get the address
+	cmpil	#0,%d0		|if dest format is extended
+	beq	dest_ext	|then branch
+	cmpil	#1,%d0		|if dest format is single
+	beq	dest_sgl	|then branch
+|
+|	fall through to dest_dbl
+|
+
+|
+|	dest_dbl --- write double precision value to user space
+|
+|Input
+|	a0 -> destination address
+|	a1 -> source in extended precision
+|Output
+|	a0 -> destroyed
+|	a1 -> destroyed
+|	d0 -> 0
+|
+|Changes extended precision to double precision.
+| Note: no attempt is made to round the extended value to double.
+|	dbl_sign = ext_sign
+|	dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)
+|	get rid of ext integer bit
+|	dbl_mant = ext_mant{62:12}
+|
+|		---------------   ---------------    ---------------
+|  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
+|		---------------   ---------------    ---------------
+|		 95	    64    63 62	      32      31     11	  0
+|				     |			     |
+|				     |			     |
+|				     |			     |
+|			             v			     v
+|			      ---------------   ---------------
+|  double   ->		      |s|exp| mant  |   |  mant       |
+|			      ---------------   ---------------
+|			      63     51   32   31	       0
+|
+dest_dbl:
+	clrl	%d0		|clear d0
+	movew	LOCAL_EX(%a1),%d0	|get exponent
+	subw	#0x3fff,%d0	|subtract extended precision bias
+	cmpw	#0x4000,%d0	|check if inf
+	beqs	inf		|if so, special case
+	addw	#0x3ff,%d0	|add double precision bias
+	swap	%d0		|d0 now in upper word
+	lsll	#4,%d0		|d0 now in proper place for dbl prec exp
+	tstb	LOCAL_SGN(%a1)
+	beqs	get_mant	|if positive, go process mantissa
+	bsetl	#31,%d0		|if negative, put in sign information
+|				; before continuing
+	bras	get_mant	|go process mantissa
+inf:
+	movel	#0x7ff00000,%d0	|load dbl inf exponent
+	clrl	LOCAL_HI(%a1)	|clear msb
+	tstb	LOCAL_SGN(%a1)
+	beqs	dbl_inf		|if positive, go ahead and write it
+	bsetl	#31,%d0		|if negative put in sign information
+dbl_inf:
+	movel	%d0,LOCAL_EX(%a1)	|put the new exp back on the stack
+	bras	dbl_wrt
+get_mant:
+	movel	LOCAL_HI(%a1),%d1	|get ms mantissa
+	bfextu	%d1{#1:#20},%d1	|get upper 20 bits of ms
+	orl	%d1,%d0		|put these bits in ms word of double
+	movel	%d0,LOCAL_EX(%a1)	|put the new exp back on the stack
+	movel	LOCAL_HI(%a1),%d1	|get ms mantissa
+	movel	#21,%d0		|load shift count
+	lsll	%d0,%d1		|put lower 11 bits in upper bits
+	movel	%d1,LOCAL_HI(%a1)	|build lower lword in memory
+	movel	LOCAL_LO(%a1),%d1	|get ls mantissa
+	bfextu	%d1{#0:#21},%d0	|get ls 21 bits of double
+	orl	%d0,LOCAL_HI(%a1)	|put them in double result
+dbl_wrt:
+	movel	#0x8,%d0		|byte count for double precision number
+	exg	%a0,%a1		|a0=supervisor source, a1=user dest
+	bsrl	mem_write	|move the number to the user's memory
+	rts
+|
+|	dest_sgl --- write single precision value to user space
+|
+|Input
+|	a0 -> destination address
+|	a1 -> source in extended precision
+|
+|Output
+|	a0 -> destroyed
+|	a1 -> destroyed
+|	d0 -> 0
+|
+|Changes extended precision to single precision.
+|	sgl_sign = ext_sign
+|	sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)
+|	get rid of ext integer bit
+|	sgl_mant = ext_mant{62:12}
+|
+|		---------------   ---------------    ---------------
+|  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |
+|		---------------   ---------------    ---------------
+|		 95	    64    63 62	   40 32      31     12	  0
+|				     |	   |
+|				     |	   |
+|				     |	   |
+|			             v     v
+|			      ---------------
+|  single   ->		      |s|exp| mant  |
+|			      ---------------
+|			      31     22     0
+|
+dest_sgl:
+	clrl	%d0
+	movew	LOCAL_EX(%a1),%d0	|get exponent
+	subw	#0x3fff,%d0	|subtract extended precision bias
+	cmpw	#0x4000,%d0	|check if inf
+	beqs	sinf		|if so, special case
+	addw	#0x7f,%d0		|add single precision bias
+	swap	%d0		|put exp in upper word of d0
+	lsll	#7,%d0		|shift it into single exp bits
+	tstb	LOCAL_SGN(%a1)
+	beqs	get_sman	|if positive, continue
+	bsetl	#31,%d0		|if negative, put in sign first
+	bras	get_sman	|get mantissa
+sinf:
+	movel	#0x7f800000,%d0	|load single inf exp to d0
+	tstb	LOCAL_SGN(%a1)
+	beqs	sgl_wrt		|if positive, continue
+	bsetl	#31,%d0		|if negative, put in sign info
+	bras	sgl_wrt
+
+get_sman:
+	movel	LOCAL_HI(%a1),%d1	|get ms mantissa
+	bfextu	%d1{#1:#23},%d1	|get upper 23 bits of ms
+	orl	%d1,%d0		|put these bits in ms word of single
+
+sgl_wrt:
+	movel	%d0,L_SCR1(%a6)	|put the new exp back on the stack
+	movel	#0x4,%d0		|byte count for single precision number
+	tstl	%a0		|users destination address
+	beqs	sgl_Dn		|destination is a data register
+	exg	%a0,%a1		|a0=supervisor source, a1=user dest
+	leal	L_SCR1(%a6),%a0	|point a0 to data
+	bsrl	mem_write	|move the number to the user's memory
+	rts
+sgl_Dn:
+	bsrl	get_fline	|returns fline word in d0
+	andw	#0x7,%d0		|isolate register number
+	movel	%d0,%d1		|d1 has size:reg formatted for reg_dest
+	orl	#0x10,%d1		|reg_dest wants size added to reg#
+	bral	reg_dest	|size is X, rts in reg_dest will
+|				;return to caller of dest_sgl
+
+dest_ext:
+	tstb	LOCAL_SGN(%a1)	|put back sign into exponent word
+	beqs	dstx_cont
+	bsetb	#sign_bit,LOCAL_EX(%a1)
+dstx_cont:
+	clrb	LOCAL_SGN(%a1)	|clear out the sign byte
+
+	movel	#0x0c,%d0		|byte count for extended number
+	exg	%a0,%a1		|a0=supervisor source, a1=user dest
+	bsrl	mem_write	|move the number to the user's memory
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/x_unfl.S b/arch/m68k/fpsp040/x_unfl.S
new file mode 100644
index 0000000..077fcc2
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unfl.S
@@ -0,0 +1,269 @@
+|
+|	x_unfl.sa 3.4 7/1/91
+|
+|	fpsp_unfl --- FPSP handler for underflow exception
+|
+| Trap disabled results
+|	For 881/2 compatibility, sw must denormalize the intermediate
+| result, then store the result.  Denormalization is accomplished
+| by taking the intermediate result (which is always normalized) and
+| shifting the mantissa right while incrementing the exponent until
+| it is equal to the denormalized exponent for the destination
+| format.  After denormalization, the result is rounded to the
+| destination format.
+|
+| Trap enabled results
+|	All trap disabled code applies.	In addition the exceptional
+| operand needs to made available to the user with a bias of $6000
+| added to the exponent.
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_UNFL:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	denorm
+	|xref	round
+	|xref	store
+	|xref	g_rndpr
+	|xref	g_opcls
+	|xref	g_dfmtou
+	|xref	real_unfl
+	|xref	real_inex
+	|xref	fpsp_done
+	|xref	b1238_fix
+
+	.global	fpsp_unfl
+fpsp_unfl:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+|
+	bsrl		unf_res	|denormalize, round & store interm op
+|
+| If underflow exceptions are not enabled, check for inexact
+| exception
+|
+	btstb		#unfl_bit,FPCR_ENABLE(%a6)
+	beqs		ck_inex
+
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_1
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_1:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		real_unfl
+|
+| It is possible to have either inex2 or inex1 exceptions with the
+| unfl.  If the inex enable bit is set in the FPCR, and either
+| inex2 or inex1 occurred, we must clean up and branch to the
+| real inex handler.
+|
+ck_inex:
+	moveb		FPCR_ENABLE(%a6),%d0
+	andb		FPSR_EXCEPT(%a6),%d0
+	andib		#0x3,%d0
+	beqs		unfl_done
+
+|
+| Inexact enabled and reported, and we must take an inexact exception
+|
+take_inex:
+	btstb		#E3,E_BYTE(%a6)
+	beqs		no_e3_2
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0	|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+no_e3_2:
+	moveb		#INEX_VEC,EXC_VEC+1(%a6)
+	moveml         USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx        USER_FP0(%a6),%fp0-%fp3
+	fmoveml        USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore        (%a7)+
+	unlk            %a6
+	bral		real_inex
+
+unfl_done:
+	bclrb		#E3,E_BYTE(%a6)
+	beqs		e1_set		|if set then branch
+|
+| Clear dirty bit on dest resister in the frame before branching
+| to b1238_fix.
+|
+	bfextu		CMDREG3B(%a6){#6:#3},%d0		|get dest reg no
+	bclrb		%d0,FPR_DIRTY_BITS(%a6)	|clr dest dirty bit
+	bsrl		b1238_fix		|test for bug1238 case
+	movel		USER_FPSR(%a6),FPSR_SHADOW(%a6)
+	orl		#sx_mask,E_BYTE(%a6)
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	frestore	(%a7)+
+	unlk		%a6
+	bral		fpsp_done
+e1_set:
+	moveml		USER_DA(%a6),%d0-%d1/%a0-%a1
+	fmovemx	USER_FP0(%a6),%fp0-%fp3
+	fmoveml	USER_FPCR(%a6),%fpcr/%fpsr/%fpiar
+	unlk		%a6
+	bral		fpsp_done
+|
+|	unf_res --- underflow result calculation
+|
+unf_res:
+	bsrl		g_rndpr		|returns RND_PREC in d0 0=ext,
+|					;1=sgl, 2=dbl
+|					;we need the RND_PREC in the
+|					;upper word for round
+	movew		#0,-(%a7)
+	movew		%d0,-(%a7)	|copy RND_PREC to stack
+|
+|
+| If the exception bit set is E3, the exceptional operand from the
+| fpu is in WBTEMP; else it is in FPTEMP.
+|
+	btstb		#E3,E_BYTE(%a6)
+	beqs		unf_E1
+unf_E3:
+	lea		WBTEMP(%a6),%a0	|a0 now points to operand
+|
+| Test for fsgldiv and fsglmul.  If the inst was one of these, then
+| force the precision to extended for the denorm routine.  Use
+| the user's precision for the round routine.
+|
+	movew		CMDREG3B(%a6),%d1	|check for fsgldiv or fsglmul
+	andiw		#0x7f,%d1
+	cmpiw		#0x30,%d1		|check for sgldiv
+	beqs		unf_sgl
+	cmpiw		#0x33,%d1		|check for sglmul
+	bnes		unf_cont	|if not, use fpcr prec in round
+unf_sgl:
+	clrl		%d0
+	movew		#0x1,(%a7)	|override g_rndpr precision
+|					;force single
+	bras		unf_cont
+unf_E1:
+	lea		FPTEMP(%a6),%a0	|a0 now points to operand
+unf_cont:
+	bclrb		#sign_bit,LOCAL_EX(%a0)	|clear sign bit
+	sne		LOCAL_SGN(%a0)		|store sign
+
+	bsrl		denorm		|returns denorm, a0 points to it
+|
+| WARNING:
+|				;d0 has guard,round sticky bit
+|				;make sure that it is not corrupted
+|				;before it reaches the round subroutine
+|				;also ensure that a0 isn't corrupted
+
+|
+| Set up d1 for round subroutine d1 contains the PREC/MODE
+| information respectively on upper/lower register halves.
+|
+	bfextu		FPCR_MODE(%a6){#2:#2},%d1	|get mode from FPCR
+|						;mode in lower d1
+	addl		(%a7)+,%d1		|merge PREC/MODE
+|
+| WARNING: a0 and d0 are assumed to be intact between the denorm and
+| round subroutines. All code between these two subroutines
+| must not corrupt a0 and d0.
+|
+|
+| Perform Round
+|	Input:		a0 points to input operand
+|			d0{31:29} has guard, round, sticky
+|			d1{01:00} has rounding mode
+|			d1{17:16} has rounding precision
+|	Output:		a0 points to rounded operand
+|
+
+	bsrl		round		|returns rounded denorm at (a0)
+|
+| Differentiate between store to memory vs. store to register
+|
+unf_store:
+	bsrl		g_opcls		|returns opclass in d0{2:0}
+	cmpib		#0x3,%d0
+	bnes		not_opc011
+|
+| At this point, a store to memory is pending
+|
+opc011:
+	bsrl		g_dfmtou
+	tstb		%d0
+	beqs		ext_opc011	|If extended, do not subtract
+|				;If destination format is sgl/dbl,
+	tstb		LOCAL_HI(%a0)	|If rounded result is normal,don't
+|					;subtract
+	bmis		ext_opc011
+	subqw		#1,LOCAL_EX(%a0)	|account for denorm bias vs.
+|				;normalized bias
+|				;          normalized   denormalized
+|				;single       $7f           $7e
+|				;double       $3ff          $3fe
+|
+ext_opc011:
+	bsrl		store		|stores to memory
+	bras		unf_done	|finish up
+
+|
+| At this point, a store to a float register is pending
+|
+not_opc011:
+	bsrl		store	|stores to float register
+|				;a0 is not corrupted on a store to a
+|				;float register.
+|
+| Set the condition codes according to result
+|
+	tstl		LOCAL_HI(%a0)	|check upper mantissa
+	bnes		ck_sgn
+	tstl		LOCAL_LO(%a0)	|check lower mantissa
+	bnes		ck_sgn
+	bsetb		#z_bit,FPSR_CC(%a6) |set condition codes if zero
+ck_sgn:
+	btstb		#sign_bit,LOCAL_EX(%a0)	|check the sign bit
+	beqs		unf_done
+	bsetb		#neg_bit,FPSR_CC(%a6)
+
+|
+| Finish.
+|
+unf_done:
+	btstb		#inex2_bit,FPSR_EXCEPT(%a6)
+	beqs		no_aunfl
+	bsetb		#aunfl_bit,FPSR_AEXCEPT(%a6)
+no_aunfl:
+	rts
+
+	|end
diff --git a/arch/m68k/fpsp040/x_unimp.S b/arch/m68k/fpsp040/x_unimp.S
new file mode 100644
index 0000000..920cb94
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unimp.S
@@ -0,0 +1,77 @@
+|
+|	x_unimp.sa 3.3 7/1/91
+|
+|	fpsp_unimp --- FPSP handler for unimplemented instruction
+|	exception.
+|
+| Invoked when the user program encounters a floating-point
+| op-code that hardware does not support.  Trap vector# 11
+| (See table 8-1 MC68030 User's Manual).
+|
+|
+| Note: An fsave for an unimplemented inst. will create a short
+| fsave stack.
+|
+|  Input: 1. Six word stack frame for unimplemented inst, four word
+|            for illegal
+|            (See table 8-7 MC68030 User's Manual).
+|         2. Unimp (short) fsave state frame created here by fsave
+|            instruction.
+|
+|
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_UNIMP:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	get_op
+	|xref	do_func
+	|xref	sto_res
+	|xref	gen_except
+	|xref	fpsp_fmt_error
+
+	.global	fpsp_unimp
+	.global	uni_2
+fpsp_unimp:
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+uni_2:
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+	moveb		(%a7),%d0		|test for valid version num
+	andib		#0xf0,%d0		|test for $4x
+	cmpib		#VER_4,%d0	|must be $4x or exit
+	bnel		fpsp_fmt_error
+|
+|	Temporary D25B Fix
+|	The following lines are used to ensure that the FPSR
+|	exception byte and condition codes are clear before proceeding
+|
+	movel		USER_FPSR(%a6),%d0
+	andl		#0xFF00FF,%d0	|clear all but accrued exceptions
+	movel		%d0,USER_FPSR(%a6)
+	fmovel		#0,%FPSR |clear all user bits
+	fmovel		#0,%FPCR	|clear all user exceptions for FPSP
+
+	clrb		UFLG_TMP(%a6)	|clr flag for unsupp data
+
+	bsrl		get_op		|go get operand(s)
+	clrb		STORE_FLG(%a6)
+	bsrl		do_func		|do the function
+	fsave		-(%a7)		|capture possible exc state
+	tstb		STORE_FLG(%a6)
+	bnes		no_store	|if STORE_FLG is set, no store
+	bsrl		sto_res		|store the result in user space
+no_store:
+	bral		gen_except	|post any exceptions and return
+
+	|end
diff --git a/arch/m68k/fpsp040/x_unsupp.S b/arch/m68k/fpsp040/x_unsupp.S
new file mode 100644
index 0000000..4ec5728
--- /dev/null
+++ b/arch/m68k/fpsp040/x_unsupp.S
@@ -0,0 +1,83 @@
+|
+|	x_unsupp.sa 3.3 7/1/91
+|
+|	fpsp_unsupp --- FPSP handler for unsupported data type exception
+|
+| Trap vector #55	(See table 8-1 Mc68030 User's manual).
+| Invoked when the user program encounters a data format (packed) that
+| hardware does not support or a data type (denormalized numbers or un-
+| normalized numbers).
+| Normalizes denorms and unnorms, unpacks packed numbers then stores
+| them back into the machine to let the 040 finish the operation.
+|
+| Unsupp calls two routines:
+|	1. get_op -  gets the operand(s)
+|	2. res_func - restore the function back into the 040 or
+|			if fmove.p fpm,<ea> then pack source (fpm)
+|			and store in users memory <ea>.
+|
+|  Input: Long fsave stack frame
+|
+|
+
+|		Copyright (C) Motorola, Inc. 1990
+|			All Rights Reserved
+|
+|	THIS IS UNPUBLISHED PROPRIETARY SOURCE CODE OF MOTOROLA
+|	The copyright notice above does not evidence any
+|	actual or intended publication of such source code.
+
+X_UNSUPP:	|idnt    2,1 | Motorola 040 Floating Point Software Package
+
+	|section	8
+
+#include "fpsp.h"
+
+	|xref	get_op
+	|xref	res_func
+	|xref	gen_except
+	|xref	fpsp_fmt_error
+
+	.global	fpsp_unsupp
+fpsp_unsupp:
+|
+	link		%a6,#-LOCAL_SIZE
+	fsave		-(%a7)
+	moveml		%d0-%d1/%a0-%a1,USER_DA(%a6)
+	fmovemx	%fp0-%fp3,USER_FP0(%a6)
+	fmoveml	%fpcr/%fpsr/%fpiar,USER_FPCR(%a6)
+
+
+	moveb		(%a7),VER_TMP(%a6) |save version number
+	moveb		(%a7),%d0		|test for valid version num
+	andib		#0xf0,%d0		|test for $4x
+	cmpib		#VER_4,%d0	|must be $4x or exit
+	bnel		fpsp_fmt_error
+
+	fmovel		#0,%FPSR		|clear all user status bits
+	fmovel		#0,%FPCR		|clear all user control bits
+|
+|	The following lines are used to ensure that the FPSR
+|	exception byte and condition codes are clear before proceeding,
+|	except in the case of fmove, which leaves the cc's intact.
+|
+unsupp_con:
+	movel		USER_FPSR(%a6),%d1
+	btst		#5,CMDREG1B(%a6)	|looking for fmove out
+	bne		fmove_con
+	andl		#0xFF00FF,%d1	|clear all but aexcs and qbyte
+	bras		end_fix
+fmove_con:
+	andl		#0x0FFF40FF,%d1	|clear all but cc's, snan bit, aexcs, and qbyte
+end_fix:
+	movel		%d1,USER_FPSR(%a6)
+
+	st		UFLG_TMP(%a6)	|set flag for unsupp data
+
+	bsrl		get_op		|everything okay, go get operand(s)
+	bsrl		res_func	|fix up stack frame so can restore it
+	clrl		-(%a7)
+	moveb		VER_TMP(%a6),(%a7) |move idle fmt word to top of stack
+	bral		gen_except
+|
+	|end