diff options
Diffstat (limited to 'sys/contrib/ia64/libuwx/src/uwx_self_context.s')
-rw-r--r-- | sys/contrib/ia64/libuwx/src/uwx_self_context.s | 383 |
1 files changed, 383 insertions, 0 deletions
diff --git a/sys/contrib/ia64/libuwx/src/uwx_self_context.s b/sys/contrib/ia64/libuwx/src/uwx_self_context.s new file mode 100644 index 0000000..e2986eb --- /dev/null +++ b/sys/contrib/ia64/libuwx/src/uwx_self_context.s @@ -0,0 +1,383 @@ +// Copyright (c) 2003-2006 Hewlett-Packard Development Company, L.P. +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifdef _LP64 +#define SWIZZLE add +#define STPTR st8 +#else +#define SWIZZLE addp4 +#define STPTR st4 +#endif + +rRP = r14 +rPFS = r15 +rUNAT = r16 +rRNAT = r17 +rENV0 = r18 +rENV1 = r19 +rENV2 = r20 +rNSLOT = r21 +rBSP = r22 +rPBSP = r23 +rRSC = r24 +rNATP = r25 +rBIAS = r26 +rRSC0 = r27 +rTMP1 = r28 +rTMP2 = r29 +rTMP3 = r30 +rTMP4 = r31 +rTMP5 = r8 +rMYPFS = r9 +rPSP = r10 + +VALID_IP = 1 +VALID_SP = 1 << 1 +VALID_BSP = 1 << 2 +VALID_CFM = 1 << 3 +VALID_PREDS = 1 << 7 +VALID_PRIUNAT = 1 << 8 +VALID_RNAT = 1 << 10 +VALID_UNAT = 1 << 11 +VALID_FPSR = 1 << 12 +VALID_LC = 1 << 13 +VALID_GRS = 0xf << 16 +VALID_BRS = 0x1f << 20 +VALID_BASIC4 = VALID_IP | VALID_SP | VALID_BSP | VALID_CFM +VALID_SPEC = VALID_PREDS | VALID_PRIUNAT | VALID_RNAT | VALID_UNAT | VALID_FPSR | VALID_LC +VALID_REGS = VALID_BASIC4 | VALID_SPEC | VALID_GRS | VALID_BRS +VALID_FRS = 0xfffff +// valid_regs and valid_frs are separate unsigned int fields. +// In order to store them with a single st8, we need to know +// the endianness. +#ifdef __LITTLE_ENDIAN__ +VALID_BITS = (VALID_FRS << 32) | VALID_REGS +#else +VALID_BITS = (VALID_REGS << 32) | VALID_FRS +#endif + + .text + +// int uwx_self_init_context(struct uwx_env *env); +// +// Stores a snapshot of the caller's context in the uwx_env structure. + + .proc uwx_self_init_context + .global uwx_self_init_context +uwx_self_init_context: + .prologue + alloc rPFS = ar.pfs, 1, 0, 0, 0 + mov rUNAT = ar.unat + .body + SWIZZLE rENV0 = r0, r32 // rENV0 = &env + ;; + flushrs + extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol + mov rRP = b0 + ;; + mov rRSC = ar.rsc + add rENV1 = 136, rENV0 // rENV1 = &env->context.gr[0] + add rENV2 = 144, rENV0 // rENV2 = &env->context.gr[1] + ;; + and rRSC0 = -4, rRSC // clear ar.rsc.mode + adds rNATP = 0x1f8, r0 + mov rTMP1 = b1 + ;; + st8.spill [rENV1] = r4, 16 // env+136: r4 + st8.spill [rENV2] = r5, 16 // env+144: r5 + mov rTMP2 = b2 + ;; + st8.spill [rENV1] = r6, 16 // env+152: r6 + st8.spill [rENV2] = r7, 16 // env+160: r7 + mov rTMP3 = b3 + ;; + st8 [rENV1] = rTMP1, 16 // env+168: b1 + st8 [rENV2] = rTMP2, 16 // env+176: b2 + mov rTMP1 = b4 + ;; + st8 [rENV1] = rTMP3, 16 // env+184: b3 + st8 [rENV2] = rTMP1, 16 // env+192: b4 + mov rTMP2 = b5 + ;; + st8 [rENV1] = rTMP2 // env+200: b5 + mov ar.rsc = rRSC0 // enforced lazy mode + add rENV1 = 8, rENV0 + ;; + mov rRNAT = ar.rnat // get copy of ar.rnat + movl rTMP1 = VALID_BITS // valid_regs: ip, sp, bsp, cfm, + // preds, priunat, rnat, unat, fpsr, + // lc, grs, brs + // = 0x1ff3d8f00000000 + ;; + mov ar.rsc = rRSC // restore ar.rsc + mov rBSP = ar.bsp + add rTMP3 = 136, rENV0 // spill_loc = &env->context.gr[0] + ;; + mov rTMP2 = ar.unat + nop + extr.u rTMP3 = rTMP3, 3, 6 // bitpos = spill_loc{8:3} + ;; + and rBIAS = rBSP, rNATP // bias = (bsp & 0x1f8) ... + sub rTMP4 = 64, rTMP3 // (64 - bitpos) + shr rTMP5 = rTMP2, rTMP3 // (unat >> bitpos) + ;; + nop + extr.u rBIAS = rBIAS, 3, 6 // ... div 8 + shl rTMP2 = rTMP2, rTMP4 // (unat << (64 - bitpos)) + ;; + or rTMP2 = rTMP2, rTMP5 // rotate_right(unat, bitpos) + nop + mov rTMP4 = pr + ;; + st8 [rENV0] = rTMP1, 16 // env+0: valid_regs mask + st8 [rENV1] = rRP, 24 // env+8: ip (my rp) + sub rBIAS = rNSLOT, rBIAS // bias = nslots - bias + ;; + cmp.lt p6, p0 = 0, rBIAS // if (0 < bias) ... + cmp.lt p7, p0 = 63, rBIAS // if (63 < bias) ... + ;; + st8 [rENV0] = r12, 48 // env+16: sp + st8 [rENV1] = rPFS, 40 // env+32: cfm (my pfs) +(p6) add rNSLOT = 1, rNSLOT // ... nslots++ + ;; + st8 [rENV0] = rTMP4, 24 // env+64: preds + st8 [rENV1] = rTMP2, 24 // env+72: priunat +(p7) add rNSLOT = 1, rNSLOT // ... nslots++ + ;; + st8 [rENV0] = rRNAT, -64 // env+88: ar.rnat + st8 [rENV1] = rUNAT, 8 // env+96: ar.unat + dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) + ;; + sub rPBSP = rBSP, rTMP3 // prev_bsp = bsp - (nslots << 3) + mov rTMP3 = ar.fpsr + mov rTMP1 = ar.lc + ;; + st8 [rENV0] = rPBSP, 184 // env+24: bsp (my prev bsp) + st8 [rENV1] = rTMP3, 8 // env+104: ar.fpsr + add rENV2 = 320, rENV2 // rENV2 = &env->context.rstate + ;; + st8 [rENV1] = rTMP1, 112 // env+112: ar.lc + STPTR [rENV2] = r0 // env+528: env->rstate = 0 + nop + ;; + // THIS CODE NEEDS TO BE SCHEDULED!!! + stf.spill [rENV0] = f2, 32 // env+208: f2 + stf.spill [rENV1] = f3, 32 // env+224: f3 + ;; + stf.spill [rENV0] = f4, 32 // env+240: f4 + stf.spill [rENV1] = f5, 32 // env+256: f5 + ;; + stf.spill [rENV0] = f16, 32 // env+272: f16 + stf.spill [rENV1] = f17, 32 // env+288: f17 + ;; + stf.spill [rENV0] = f18, 32 // env+304: f16 + stf.spill [rENV1] = f19, 32 // env+320: f17 + ;; + stf.spill [rENV0] = f20, 32 // env+336: f16 + stf.spill [rENV1] = f21, 32 // env+352: f17 + ;; + stf.spill [rENV0] = f22, 32 // env+368: f16 + stf.spill [rENV1] = f23, 32 // env+384: f17 + ;; + stf.spill [rENV0] = f24, 32 // env+400: f16 + stf.spill [rENV1] = f25, 32 // env+416: f17 + ;; + stf.spill [rENV0] = f26, 32 // env+432: f16 + stf.spill [rENV1] = f27, 32 // env+448: f17 + ;; + stf.spill [rENV0] = f28, 32 // env+464: f16 + stf.spill [rENV1] = f29, 32 // env+480: f17 + ;; + stf.spill [rENV0] = f30, 32 // env+496: f16 + stf.spill [rENV1] = f31, 32 // env+512: f17 + ;; + mov ar.unat = rUNAT + mov ret0 = r0 // return UWX_OK + br.ret.sptk b0 + .endp + +// uwx_self_install_context( +// struct uwx_env *env, +// uint64_t r15, +// uint64_t r16, +// uint64_t r17, +// uint64_t r18, +// uint64_t ret +// ); +// +// Installs the given context, and sets the landing pad binding +// registers r15-r18 to the values given. +// Returns the value "ret" to the new context (for testing -- +// when transferring to a landing pad, the new context won't +// care about the return value). + + .proc uwx_self_install_context + .global uwx_self_install_context +uwx_self_install_context: + .prologue + alloc rMYPFS = ar.pfs, 6, 0, 0, 0 + .body + SWIZZLE rENV0 = r0, r32 // rENV0 = &env + ;; + + // THIS CODE NEEDS TO BE SCHEDULED!!! + + // Restore GR 4-7 and ar.unat + add rENV1 = 136, rENV0 // &env->context.gr[0] + add rENV2 = 72, rENV0 // &env->context.priunat + ;; + ld8 rTMP2 = [rENV2], 24 // env+72: priunat + extr.u rTMP3 = rENV1, 3, 6 // bitpos = spill_loc{8:3} + ;; + ld8 rUNAT = [rENV2], 48 // env+96: ar.unat + sub rTMP4 = 64, rTMP3 // (64 - bitpos) + shl rTMP5 = rTMP2, rTMP3 // (unat << bitpos) + ;; + shr rTMP2 = rTMP2, rTMP4 // (unat >> (64 - bitpos)) + ;; + or rTMP2 = rTMP2, rTMP5 // rotate_left(unat, bitpos) + ;; + mov ar.unat = rTMP2 // put priunat in place + ;; + ld8.fill r4 = [rENV1], 16 // env+136: r4 + ld8.fill r5 = [rENV2], 16 // env+144: r5 + ;; + ld8.fill r6 = [rENV1], 16 // env+152: r6 + ld8.fill r7 = [rENV2], 16 // env+160: r7 + ;; + mov ar.unat = rUNAT // restore real ar.unat + + // Restore BR 1-5 + ld8 rTMP1 = [rENV1], 16 // env+168: b1 + ld8 rTMP2 = [rENV2], 16 // env+176: b2 + ;; + ld8 rTMP3 = [rENV1], 16 // env+184: b3 + ld8 rTMP4 = [rENV2], -168 // env+192: b4 + mov b1 = rTMP1 + ;; + ld8 rTMP1 = [rENV1], -168 // env+200: b5 + mov b2 = rTMP2 + mov b3 = rTMP3 + mov b4 = rTMP4 + ;; + mov b5 = rTMP1 + + // Restore ar.bsp, ar.pfs, and ar.rnat + ld8 rPFS = [rENV1], 56 // env+32: cfm (+saved ar.ec) + mov rRSC = ar.rsc + adds rBIAS = 0x1f8, r0 + ;; + flushrs + ld8 rRNAT = [rENV1], -24 // env+88: ar.rnat + ld8 rPBSP = [rENV2], 88 // env+24: prev_bsp + and rRSC0 = -4, rRSC // clear ar.rsc.mode + ;; + mov ar.rsc = rRSC0 // enforced lazy mode + extr.u rNSLOT = rPFS, 7, 7 // nslots = pfs.sol + ;; + invala + and rBIAS = rPBSP, rBIAS // bias = prev_bsp & 0x1f8 ... + ;; + extr.u rBIAS = rBIAS, 3, 6 // ... div 8 + ;; + add rBIAS = rNSLOT, rBIAS // bias += nslots + ;; + cmp.lt p6, p0 = 63, rBIAS // if (63 < bias) ... + cmp.lt p7, p0 = 126, rBIAS // if (126 < bias) ... + ;; +(p6) add rNSLOT = 1, rNSLOT // ... nslots++ + ;; +(p7) add rNSLOT = 1, rNSLOT // ... nslots++ + ;; + dep.z rTMP3 = rNSLOT, 3, 7 // (nslots << 3) + ;; + add rBSP = rPBSP, rTMP3 // bsp = prev_bsp + (nslots << 3) + ;; + mov ar.bspstore = rBSP // restore ar.bsp + ;; + mov ar.rnat = rRNAT // restore ar.rnat + mov ar.pfs = rPFS // restore ar.pfs + ;; + mov ar.rsc = rRSC // restore ar.rsc + + // Restore preds and ar.lc + ld8 rTMP1 = [rENV1], -56 // env+64: preds + ld8 rTMP2 = [rENV2], -96 // env+112: ar.lc + ;; + mov pr = rTMP1 + mov ar.lc = rTMP2 + + // Get previous sp and ip + ld8 rRP = [rENV1], 96 // env+8: ip (my rp) + ld8 rPSP = [rENV2], 112 // env+16: sp + ;; + + // Restore ar.fpsr and gp + ld8 rTMP1 = [rENV1], 104 // env+104: ar.fpsr + ld8 r1 = [rENV2], 96 // env+128: gp + ;; + mov ar.fpsr = rTMP1 // restore ar.fpsr + + // Restore FR 2-5 and 16-31 + ldf.fill f2 = [rENV1], 32 // env+208: f2 + ldf.fill f3 = [rENV2], 32 // env+224: f3 + ;; + ldf.fill f4 = [rENV1], 32 // env+240: f4 + ldf.fill f5 = [rENV2], 32 // env+256: f5 + ;; + ldf.fill f16 = [rENV1], 32 // env+272: f16 + ldf.fill f17 = [rENV2], 32 // env+288: f17 + ;; + ldf.fill f18 = [rENV1], 32 // env+304: f16 + ldf.fill f19 = [rENV2], 32 // env+320: f17 + ;; + ldf.fill f20 = [rENV1], 32 // env+336: f16 + ldf.fill f21 = [rENV2], 32 // env+352: f17 + ;; + ldf.fill f22 = [rENV1], 32 // env+368: f16 + ldf.fill f23 = [rENV2], 32 // env+384: f17 + ;; + ldf.fill f24 = [rENV1], 32 // env+400: f16 + ldf.fill f25 = [rENV2], 32 // env+416: f17 + ;; + ldf.fill f26 = [rENV1], 32 // env+432: f16 + ldf.fill f27 = [rENV2], 32 // env+448: f17 + ;; + ldf.fill f28 = [rENV1], 32 // env+464: f16 + ldf.fill f29 = [rENV2], 32 // env+480: f17 + ;; + ldf.fill f30 = [rENV1], 32 // env+496: f16 + ldf.fill f31 = [rENV2], 32 // env+512: f17 + + // Set landing pad parameter registers + mov r15 = r33 + mov r16 = r34 + mov r17 = r35 + mov r18 = r36 + + // Restore previous sp and Return + mov ret0 = r37 + mov sp = rPSP + mov b0 = rRP + br.ret.sptk b0 + + .endp |