1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
|
// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P.
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifdef _LP64
#define SWIZZLE add
#define STPTR st8
#else
#define SWIZZLE addp4
#define STPTR st4
#endif
rRP = r14
rPFS = r15
rUNAT = r16
rRNAT = r17
rENV0 = r18
rENV1 = r19
rENV2 = r20
rNSLOT = r21
rBSP = r22
rPBSP = r23
rRSC = r24
rNATP = r25
rBIAS = r26
rRSC0 = r27
rTMP1 = r28
rTMP2 = r29
rTMP3 = r30
rTMP4 = r31
rTMP5 = r8
rMYPFS = r9
rPSP = r10
VALID_IP = 1
VALID_SP = 1 << 1
VALID_BSP = 1 << 2
VALID_CFM = 1 << 3
VALID_PREDS = 1 << 7
VALID_PRIUNAT = 1 << 8
VALID_RNAT = 1 << 10
VALID_UNAT = 1 << 11
VALID_FPSR = 1 << 12
VALID_LC = 1 << 13
VALID_GRS = 0xf << 16
VALID_BRS = 0x1f << 20
VALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM
VALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC
VALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS
VALID_FRS = 0xfffff
// valid_regs and valid_frs are separate unsigned int fields.
// In order to store them with a single st8, we need to know
// the endianness.
#ifdef __LITTLE_ENDIAN__
VALID_BITS = (VALID_FRS << 32) | VALID_REGS
#else
VALID_BITS = (VALID_REGS << 32) | VALID_FRS
#endif
.text
// int uwx_self_init_context(struct uwx_env *env);
//
// Stores a snapshot of the caller's context in the uwx_env structure.
.proc uwx_self_init_context
.global uwx_self_init_context
uwx_self_init_context:
.prologue
alloc rPFS = ar.pfs, 1, 0, 0, 0
mov rUNAT = ar.unat
.body
SWIZZLE rENV0 = r0, r32 // rENV0 = &env
;;
flushrs
extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol
mov rRP = b0
;;
mov rRSC = ar.rsc
add rENV1 = 136, rENV0 // rENV1 = &env->context.gr[0]
add rENV2 = 144, rENV0 // rENV2 = &env->context.gr[1]
;;
and rRSC0 = -4, rRSC // clear ar.rsc.mode
adds rNATP = 0x1f8, r0
mov rTMP1 = b1
;;
st8.spill [rENV1] = r4, 16 // env+136: r4
st8.spill [rENV2] = r5, 16 // env+144: r5
mov rTMP2 = b2
;;
st8.spill [rENV1] = r6, 16 // env+152: r6
st8.spill [rENV2] = r7, 16 // env+160: r7
mov rTMP3 = b3
;;
st8 [rENV1] = rTMP1, 16 // env+168: b1
st8 [rENV2] = rTMP2, 16 // env+176: b2
mov rTMP1 = b4
;;
st8 [rENV1] = rTMP3, 16 // env+184: b3
st8 [rENV2] = rTMP1, 16 // env+192: b4
mov rTMP2 = b5
;;
st8 [rENV1] = rTMP2 // env+200: b5
mov ar.rsc = rRSC0 // enforced lazy mode
add rENV1 = 8, rENV0
;;
mov rRNAT = ar.rnat // get copy of ar.rnat
movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm,
// preds, priunat, rnat, unat, fpsr,
// lc, grs, brs
// = 0x1ff3d8f00000000
;;
mov ar.rsc = rRSC // restore ar.rsc
mov rBSP = ar.bsp
add rTMP3 = 136, rENV0 // spill_loc = &env->context.gr[0]
;;
mov rTMP2 = ar.unat
nop
extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3}
;;
and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ...
sub rTMP4 = 64, rTMP3 // (64 - bitpos)
shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos)
;;
nop
extr.u rBIAS = rBIAS, 3, 6 // ... div 8
shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos))
;;
or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos)
nop
mov rTMP4 = pr
;;
st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask
st8 [rENV1] = rRP, 24 // env+8: ip (my rp)
sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias
;;
cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ...
cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ...
;;
st8 [rENV0] = r12, 48 // env+16: sp
st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs)
(p6) add rNSLOT = 1, rNSLOT // ... nslots++
;;
st8 [rENV0] = rTMP4, 24 // env+64: preds
st8 [rENV1] = rTMP2, 24 // env+72: priunat
(p7) add rNSLOT = 1, rNSLOT // ... nslots++
;;
st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat
st8 [rENV1] = rUNAT, 8 // env+96: ar.unat
dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3)
;;
sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3)
mov rTMP3 = ar.fpsr
mov rTMP1 = ar.lc
;;
st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp)
st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr
add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate
;;
st8 [rENV1] = rTMP1, 112 // env+112: ar.lc
STPTR [rENV2] = r0 // env+528: env->rstate = 0
nop
;;
// THIS CODE NEEDS TO BE SCHEDULED!!!
stf.spill [rENV0] = f2, 32 // env+208: f2
stf.spill [rENV1] = f3, 32 // env+224: f3
;;
stf.spill [rENV0] = f4, 32 // env+240: f4
stf.spill [rENV1] = f5, 32 // env+256: f5
;;
stf.spill [rENV0] = f16, 32 // env+272: f16
stf.spill [rENV1] = f17, 32 // env+288: f17
;;
stf.spill [rENV0] = f18, 32 // env+304: f16
stf.spill [rENV1] = f19, 32 // env+320: f17
;;
stf.spill [rENV0] = f20, 32 // env+336: f16
stf.spill [rENV1] = f21, 32 // env+352: f17
;;
stf.spill [rENV0] = f22, 32 // env+368: f16
stf.spill [rENV1] = f23, 32 // env+384: f17
;;
stf.spill [rENV0] = f24, 32 // env+400: f16
stf.spill [rENV1] = f25, 32 // env+416: f17
;;
stf.spill [rENV0] = f26, 32 // env+432: f16
stf.spill [rENV1] = f27, 32 // env+448: f17
;;
stf.spill [rENV0] = f28, 32 // env+464: f16
stf.spill [rENV1] = f29, 32 // env+480: f17
;;
stf.spill [rENV0] = f30, 32 // env+496: f16
stf.spill [rENV1] = f31, 32 // env+512: f17
;;
mov ar.unat = rUNAT
mov ret0 = r0 // return UWX_OK
br.ret.sptk b0
.endp
// uwx_self_install_context(
// struct uwx_env *env,
// uint64_t r15,
// uint64_t r16,
// uint64_t r17,
// uint64_t r18,
// uint64_t ret
// );
//
// Installs the given context, and sets the landing pad binding
// registers r15-r18 to the values given.
// Returns the value "ret" to the new context (for testing --
// when transferring to a landing pad, the new context won't
// care about the return value).
.proc uwx_self_install_context
.global uwx_self_install_context
uwx_self_install_context:
.prologue
alloc rMYPFS = ar.pfs, 6, 0, 0, 0
.body
SWIZZLE rENV0 = r0, r32 // rENV0 = &env
;;
// THIS CODE NEEDS TO BE SCHEDULED!!!
// Restore GR 4-7 and ar.unat
add rENV1 = 136, rENV0 // &env->context.gr[0]
add rENV2 = 72, rENV0 // &env->context.priunat
;;
ld8 rTMP2 = [rENV2], 24 // env+72: priunat
extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3}
;;
ld8 rUNAT = [rENV2], 48 // env+96: ar.unat
sub rTMP4 = 64, rTMP3 // (64 - bitpos)
shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos)
;;
shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos))
;;
or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos)
;;
mov ar.unat = rTMP2 // put priunat in place
;;
ld8.fill r4 = [rENV1], 16 // env+136: r4
ld8.fill r5 = [rENV2], 16 // env+144: r5
;;
ld8.fill r6 = [rENV1], 16 // env+152: r6
ld8.fill r7 = [rENV2], 16 // env+160: r7
;;
mov ar.unat = rUNAT // restore real ar.unat
// Restore BR 1-5
ld8 rTMP1 = [rENV1], 16 // env+168: b1
ld8 rTMP2 = [rENV2], 16 // env+176: b2
;;
ld8 rTMP3 = [rENV1], 16 // env+184: b3
ld8 rTMP4 = [rENV2], -168 // env+192: b4
mov b1 = rTMP1
;;
ld8 rTMP1 = [rENV1], -168 // env+200: b5
mov b2 = rTMP2
mov b3 = rTMP3
mov b4 = rTMP4
;;
mov b5 = rTMP1
// Restore ar.bsp, ar.pfs, and ar.rnat
ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec)
mov rRSC = ar.rsc
adds rBIAS = 0x1f8, r0
;;
flushrs
ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat
ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp
and rRSC0 = -4, rRSC // clear ar.rsc.mode
;;
mov ar.rsc = rRSC0 // enforced lazy mode
extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol
;;
invala
and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ...
;;
extr.u rBIAS = rBIAS, 3, 6 // ... div 8
;;
add rBIAS = rNSLOT, rBIAS // bias += nslots
;;
cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ...
cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ...
;;
(p6) add rNSLOT = 1, rNSLOT // ... nslots++
;;
(p7) add rNSLOT = 1, rNSLOT // ... nslots++
;;
dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3)
;;
add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3)
;;
mov ar.bspstore = rBSP // restore ar.bsp
;;
mov ar.rnat = rRNAT // restore ar.rnat
mov ar.pfs = rPFS // restore ar.pfs
;;
mov ar.rsc = rRSC // restore ar.rsc
// Restore preds and ar.lc
ld8 rTMP1 = [rENV1], -56 // env+64: preds
ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc
;;
mov pr = rTMP1
mov ar.lc = rTMP2
// Get previous sp and ip
ld8 rRP = [rENV1], 96 // env+8: ip (my rp)
ld8 rPSP = [rENV2], 112 // env+16: sp
;;
// Restore ar.fpsr and gp
ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr
ld8 r1 = [rENV2], 96 // env+128: gp
;;
mov ar.fpsr = rTMP1 // restore ar.fpsr
// Restore FR 2-5 and 16-31
ldf.fill f2 = [rENV1], 32 // env+208: f2
ldf.fill f3 = [rENV2], 32 // env+224: f3
;;
ldf.fill f4 = [rENV1], 32 // env+240: f4
ldf.fill f5 = [rENV2], 32 // env+256: f5
;;
ldf.fill f16 = [rENV1], 32 // env+272: f16
ldf.fill f17 = [rENV2], 32 // env+288: f17
;;
ldf.fill f18 = [rENV1], 32 // env+304: f16
ldf.fill f19 = [rENV2], 32 // env+320: f17
;;
ldf.fill f20 = [rENV1], 32 // env+336: f16
ldf.fill f21 = [rENV2], 32 // env+352: f17
;;
ldf.fill f22 = [rENV1], 32 // env+368: f16
ldf.fill f23 = [rENV2], 32 // env+384: f17
;;
ldf.fill f24 = [rENV1], 32 // env+400: f16
ldf.fill f25 = [rENV2], 32 // env+416: f17
;;
ldf.fill f26 = [rENV1], 32 // env+432: f16
ldf.fill f27 = [rENV2], 32 // env+448: f17
;;
ldf.fill f28 = [rENV1], 32 // env+464: f16
ldf.fill f29 = [rENV2], 32 // env+480: f17
;;
ldf.fill f30 = [rENV1], 32 // env+496: f16
ldf.fill f31 = [rENV2], 32 // env+512: f17
// Set landing pad parameter registers
mov r15 = r33
mov r16 = r34
mov r17 = r35
mov r18 = r36
// Restore previous sp and Return
mov ret0 = r37
mov sp = rPSP
mov b0 = rRP
br.ret.sptk b0
.endp
|