summaryrefslogtreecommitdiffstats
path: root/arch/x86/math-emu/polynom_Xsig.S
blob: 17315c89ff3d07ae1c263a206dfafbe0bd531338 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/*---------------------------------------------------------------------------+
 |  polynomial_Xsig.S                                                        |
 |                                                                           |
 | Fixed point arithmetic polynomial evaluation.                             |
 |                                                                           |
 | Copyright (C) 1992,1993,1994,1995                                         |
 |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
 |                       Australia.  E-mail billm@jacobi.maths.monash.edu.au |
 |                                                                           |
 | Call from C as:                                                           |
 |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
 |                        unsigned long long terms[], int n)                 |
 |                                                                           |
 | Computes:                                                                 |
 | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
 | and adds the result to the 12 byte Xsig.                                  |
 | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
 | precision.                                                                |
 |                                                                           |
 | This function must be used carefully: most overflow of intermediate       |
 | results is controlled, but overflow of the result is not.                 |
 |                                                                           |
 +---------------------------------------------------------------------------*/
	.file	"polynomial_Xsig.S"

#include "fpu_emu.h"


#define	TERM_SIZE	$8
#define	SUM_MS		-20(%ebp)	/* sum ms long */
#define SUM_MIDDLE	-24(%ebp)	/* sum middle long */
#define	SUM_LS		-28(%ebp)	/* sum ls long */
#define	ACCUM_MS	-4(%ebp)	/* accum ms long */
#define	ACCUM_MIDDLE	-8(%ebp)	/* accum middle long */
#define	ACCUM_LS	-12(%ebp)	/* accum ls long */
#define OVERFLOWED      -16(%ebp)	/* addition overflow flag */

.text
ENTRY(polynomial_Xsig)
	pushl	%ebp
	movl	%esp,%ebp
	subl	$32,%esp
	pushl	%esi
	pushl	%edi
	pushl	%ebx

	movl	PARAM2,%esi		/* x */
	movl	PARAM3,%edi		/* terms */

	movl	TERM_SIZE,%eax
	mull	PARAM4			/* n */
	addl	%eax,%edi

	movl	4(%edi),%edx		/* terms[n] */
	movl	%edx,SUM_MS
	movl	(%edi),%edx		/* terms[n] */
	movl	%edx,SUM_MIDDLE
	xor	%eax,%eax
	movl	%eax,SUM_LS
	movb	%al,OVERFLOWED

	subl	TERM_SIZE,%edi
	decl	PARAM4
	js	L_accum_done

L_accum_loop:
	xor	%eax,%eax
	movl	%eax,ACCUM_MS
	movl	%eax,ACCUM_MIDDLE

	movl	SUM_MIDDLE,%eax
	mull	(%esi)			/* x ls long */
	movl	%edx,ACCUM_LS

	movl	SUM_MIDDLE,%eax
	mull	4(%esi)			/* x ms long */
	addl	%eax,ACCUM_LS
	adcl	%edx,ACCUM_MIDDLE
	adcl	$0,ACCUM_MS

	movl	SUM_MS,%eax
	mull	(%esi)			/* x ls long */
	addl	%eax,ACCUM_LS
	adcl	%edx,ACCUM_MIDDLE
	adcl	$0,ACCUM_MS

	movl	SUM_MS,%eax
	mull	4(%esi)			/* x ms long */
	addl	%eax,ACCUM_MIDDLE
	adcl	%edx,ACCUM_MS

	testb	$0xff,OVERFLOWED
	jz	L_no_overflow

	movl	(%esi),%eax
	addl	%eax,ACCUM_MIDDLE
	movl	4(%esi),%eax
	adcl	%eax,ACCUM_MS		/* This could overflow too */

L_no_overflow:

/*
 * Now put the sum of next term and the accumulator
 * into the sum register
 */
	movl	ACCUM_LS,%eax
	addl	(%edi),%eax		/* term ls long */
	movl	%eax,SUM_LS
	movl	ACCUM_MIDDLE,%eax
	adcl	(%edi),%eax		/* term ls long */
	movl	%eax,SUM_MIDDLE
	movl	ACCUM_MS,%eax
	adcl	4(%edi),%eax		/* term ms long */
	movl	%eax,SUM_MS
	sbbb	%al,%al
	movb	%al,OVERFLOWED		/* Used in the next iteration */

	subl	TERM_SIZE,%edi
	decl	PARAM4
	jns	L_accum_loop

L_accum_done:
	movl	PARAM1,%edi		/* accum */
	movl	SUM_LS,%eax
	addl	%eax,(%edi)
	movl	SUM_MIDDLE,%eax
	adcl	%eax,4(%edi)
	movl	SUM_MS,%eax
	adcl	%eax,8(%edi)

	popl	%ebx
	popl	%edi
	popl	%esi
	leave
	ret
OpenPOWER on IntegriCloud