From bd3dff9a70f16d5b5e3d0958fdf1aadf3d6b8e74 Mon Sep 17 00:00:00 2001 From: tmm Date: Mon, 18 Nov 2002 22:08:50 +0000 Subject: Fix the handling of high PLT entries (> 32764) on sparc64. This requires additional arguments to reloc_jmpslot(), which is why MI code and MD code of other platforms had to be changed. Reviewed by: jake Approved by: re --- libexec/rtld-elf/alpha/reloc.c | 7 +- libexec/rtld-elf/alpha/rtld_machdep.h | 4 +- libexec/rtld-elf/amd64/reloc.c | 2 +- libexec/rtld-elf/amd64/rtld_machdep.h | 3 +- libexec/rtld-elf/i386/reloc.c | 2 +- libexec/rtld-elf/i386/rtld_machdep.h | 3 +- libexec/rtld-elf/ia64/reloc.c | 7 +- libexec/rtld-elf/ia64/rtld_machdep.h | 3 +- libexec/rtld-elf/rtld.c | 2 +- libexec/rtld-elf/sparc64/reloc.c | 363 +++++++++++++++++--------------- libexec/rtld-elf/sparc64/rtld_machdep.h | 4 +- libexec/rtld-elf/sparc64/rtld_start.S | 55 ++--- 12 files changed, 242 insertions(+), 213 deletions(-) (limited to 'libexec/rtld-elf') diff --git a/libexec/rtld-elf/alpha/reloc.c b/libexec/rtld-elf/alpha/reloc.c index 4d9356c..c1a71cf 100644 --- a/libexec/rtld-elf/alpha/reloc.c +++ b/libexec/rtld-elf/alpha/reloc.c @@ -252,7 +252,7 @@ reloc_jmpslots(Obj_Entry *obj) return -1; reloc_jmpslot(where, (Elf_Addr)(defobj->relocbase + def->st_value), - defobj); + defobj, obj, rel); } } else { const Elf_Rela *relalim; @@ -272,7 +272,7 @@ reloc_jmpslots(Obj_Entry *obj) return -1; reloc_jmpslot(where, (Elf_Addr)(defobj->relocbase + def->st_value), - defobj); + defobj, obj, (Elf_Rel *)rela); } } obj->jmpslots_done = true; @@ -281,7 +281,8 @@ reloc_jmpslots(Obj_Entry *obj) /* Fixup the jump slot at "where" to transfer control to "target". */ Elf_Addr -reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj) +reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj, + const Obj_Entry *refobj, const Elf_Rel *rel) { Elf_Addr stubaddr; diff --git a/libexec/rtld-elf/alpha/rtld_machdep.h b/libexec/rtld-elf/alpha/rtld_machdep.h index 777eb0f..d224b9c 100644 --- a/libexec/rtld-elf/alpha/rtld_machdep.h +++ b/libexec/rtld-elf/alpha/rtld_machdep.h @@ -35,7 +35,9 @@ struct Struct_Obj_Entry; #define rtld_dynamic(obj) (&_DYNAMIC) Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, - const struct Struct_Obj_Entry *obj); + const struct Struct_Obj_Entry *, + const struct Struct_Obj_Entry *, + const Elf_Rel *); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) diff --git a/libexec/rtld-elf/amd64/reloc.c b/libexec/rtld-elf/amd64/reloc.c index f3425eb..f8457b3 100644 --- a/libexec/rtld-elf/amd64/reloc.c +++ b/libexec/rtld-elf/amd64/reloc.c @@ -259,7 +259,7 @@ reloc_jmpslots(Obj_Entry *obj) if (def == NULL) return -1; target = (Elf_Addr)(defobj->relocbase + def->st_value); - reloc_jmpslot(where, target, defobj); + reloc_jmpslot(where, target, defobj, obj, rel); } obj->jmpslots_done = true; return 0; diff --git a/libexec/rtld-elf/amd64/rtld_machdep.h b/libexec/rtld-elf/amd64/rtld_machdep.h index 1f83411..4bd7ae2 100644 --- a/libexec/rtld-elf/amd64/rtld_machdep.h +++ b/libexec/rtld-elf/amd64/rtld_machdep.h @@ -38,7 +38,8 @@ struct Struct_Obj_Entry; /* Fixup the jump slot at "where" to transfer control to "target". */ static inline Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, - const struct Struct_Obj_Entry *obj) + const struct Struct_Obj_Entry *obj, + const struct Struct_Obj_Entry *refobj, const Elf_Rel *rel) { dbg("reloc_jmpslot: *%p = %p", (void *)(where), (void *)(target)); diff --git a/libexec/rtld-elf/i386/reloc.c b/libexec/rtld-elf/i386/reloc.c index f3425eb..f8457b3 100644 --- a/libexec/rtld-elf/i386/reloc.c +++ b/libexec/rtld-elf/i386/reloc.c @@ -259,7 +259,7 @@ reloc_jmpslots(Obj_Entry *obj) if (def == NULL) return -1; target = (Elf_Addr)(defobj->relocbase + def->st_value); - reloc_jmpslot(where, target, defobj); + reloc_jmpslot(where, target, defobj, obj, rel); } obj->jmpslots_done = true; return 0; diff --git a/libexec/rtld-elf/i386/rtld_machdep.h b/libexec/rtld-elf/i386/rtld_machdep.h index 1f83411..4bd7ae2 100644 --- a/libexec/rtld-elf/i386/rtld_machdep.h +++ b/libexec/rtld-elf/i386/rtld_machdep.h @@ -38,7 +38,8 @@ struct Struct_Obj_Entry; /* Fixup the jump slot at "where" to transfer control to "target". */ static inline Elf_Addr reloc_jmpslot(Elf_Addr *where, Elf_Addr target, - const struct Struct_Obj_Entry *obj) + const struct Struct_Obj_Entry *obj, + const struct Struct_Obj_Entry *refobj, const Elf_Rel *rel) { dbg("reloc_jmpslot: *%p = %p", (void *)(where), (void *)(target)); diff --git a/libexec/rtld-elf/ia64/reloc.c b/libexec/rtld-elf/ia64/reloc.c index 7f8c166..be846c1 100644 --- a/libexec/rtld-elf/ia64/reloc.c +++ b/libexec/rtld-elf/ia64/reloc.c @@ -364,7 +364,7 @@ reloc_jmpslots(Obj_Entry *obj) reloc_jmpslot(where, (Elf_Addr)(defobj->relocbase + def->st_value), - defobj); + defobj, obj, rel); } } else { const Elf_Rela *relalim; @@ -385,7 +385,7 @@ reloc_jmpslots(Obj_Entry *obj) reloc_jmpslot(where, (Elf_Addr)(defobj->relocbase + def->st_value), - defobj); + defobj, obj, (Elf_Rel *)rela); } } obj->jmpslots_done = true; @@ -394,7 +394,8 @@ reloc_jmpslots(Obj_Entry *obj) /* Fixup the jump slot at "where" to transfer control to "target". */ Elf_Addr -reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj) +reloc_jmpslot(Elf_Addr *where, Elf_Addr target, const Obj_Entry *obj, + const Obj_Entry *refobj, const Elf_Rel *rel) { Elf_Addr stubaddr; diff --git a/libexec/rtld-elf/ia64/rtld_machdep.h b/libexec/rtld-elf/ia64/rtld_machdep.h index b4f8e2c..a0ea72f 100644 --- a/libexec/rtld-elf/ia64/rtld_machdep.h +++ b/libexec/rtld-elf/ia64/rtld_machdep.h @@ -45,7 +45,8 @@ struct fptr { struct Struct_Obj_Entry; -Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, const struct Struct_Obj_Entry *); +Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, const struct Struct_Obj_Entry *, + const struct Struct_Obj_Entry *, const Elf_Rel *); void *make_function_pointer(const Elf_Sym *, const struct Struct_Obj_Entry *); void call_initfini_pointer(const struct Struct_Obj_Entry *, Elf_Addr); diff --git a/libexec/rtld-elf/rtld.c b/libexec/rtld-elf/rtld.c index 04ce9c4..2447202 100644 --- a/libexec/rtld-elf/rtld.c +++ b/libexec/rtld-elf/rtld.c @@ -445,7 +445,7 @@ _rtld_bind(Obj_Entry *obj, Elf_Word reloff) * address. The value returned from reloc_jmpslot() is the value * that the trampoline needs. */ - target = reloc_jmpslot(where, target, defobj); + target = reloc_jmpslot(where, target, defobj, obj, rel); rlock_release(); return target; } diff --git a/libexec/rtld-elf/sparc64/reloc.c b/libexec/rtld-elf/sparc64/reloc.c index 7a43ce4..1300065 100644 --- a/libexec/rtld-elf/sparc64/reloc.c +++ b/libexec/rtld-elf/sparc64/reloc.c @@ -458,204 +458,217 @@ reloc_jmpslots(Obj_Entry *obj) if (def == NULL) return -1; target = (Elf_Addr)(defobj->relocbase + def->st_value); - reloc_jmpslot(where, target, defobj); + reloc_jmpslot(where, target, defobj, obj, (Elf_Rel *)rela); } obj->jmpslots_done = true; return (0); } Elf_Addr -reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj) +reloc_jmpslot(Elf_Addr *wherep, Elf_Addr target, const Obj_Entry *obj, + const Obj_Entry *refobj, const Elf_Rel *rel) { + const Elf_Rela *rela = (const Elf_Rela *)rel; Elf_Addr offset; Elf_Half *where; - /* - * At the PLT entry pointed at by `where', we now construct - * a direct transfer to the now fully resolved function - * address. - * - * A PLT entry is supposed to start by looking like this: - * - * sethi %hi(. - .PLT0), %g1 - * ba,a %xcc, .PLT1 - * nop - * nop - * nop - * nop - * nop - * nop - * - * When we replace these entries we start from the second - * entry and do it in reverse order so the last thing we - * do is replace the branch. That allows us to change this - * atomically. - * - * We now need to find out how far we need to jump. We - * have a choice of several different relocation techniques - * which are increasingly expensive. - */ - - where = (Elf_Half *)wherep; - offset = ((Elf_Addr)where) - target; - if (offset <= (1L<<20) && offset >= -(1L<<20)) { - /* - * We're within 1MB -- we can use a direct branch insn. - * - * We can generate this pattern: - * - * sethi %hi(. - .PLT0), %g1 - * ba,a %xcc, addr - * nop - * nop - * nop - * nop - * nop - * nop - * - */ - where[1] = BAA | ((offset >> 2) &0x3fffff); - flush(where, 4); - } else if (target >= 0 && target < (1L<<32)) { - /* - * We're withing 32-bits of address zero. - * - * The resulting code in the jump slot is: - * - * sethi %hi(. - .PLT0), %g1 - * sethi %hi(addr), %g1 - * jmp %g1+%lo(addr) - * nop - * nop - * nop - * nop - * nop - * - */ - where[2] = JMP | LOVAL(target); - flush(where, 8); - where[1] = SETHI | HIVAL(target, 10); - flush(where, 4); - } else if (target <= 0 && target > -(1L<<32)) { - /* - * We're withing 32-bits of address -1. - * - * The resulting code in the jump slot is: - * - * sethi %hi(. - .PLT0), %g1 - * sethi %hix(addr), %g1 - * xor %g1, %lox(addr), %g1 - * jmp %g1 - * nop - * nop - * nop - * nop - * - */ - where[3] = JMP; - flush(where, 12); - where[2] = XOR | ((~target) & 0x00001fff); - flush(where, 8); - where[1] = SETHI | HIVAL(~target, 10); - flush(where, 4); - } else if (offset <= (1L<<32) && offset >= -((1L<<32) - 4)) { - /* - * We're withing 32-bits -- we can use a direct call insn + if (rela - refobj->pltrela < 32764) { + /* + * At the PLT entry pointed at by `where', we now construct + * a direct transfer to the now fully resolved function + * address. * - * The resulting code in the jump slot is: + * A PLT entry is supposed to start by looking like this: * - * sethi %hi(. - .PLT0), %g1 - * mov %o7, %g1 - * call (.+offset) - * mov %g1, %o7 - * nop + * sethi (. - .PLT0), %g1 + * ba,a %xcc, .PLT1 * nop * nop * nop - * - */ - where[3] = MOV17; - flush(where, 12); - where[2] = CALL | ((offset >> 4) & 0x3fffffff); - flush(where, 8); - where[1] = MOV71; - flush(where, 4); - } else if (offset >= 0 && offset < (1L<<44)) { - /* - * We're withing 44 bits. We can generate this pattern: - * - * The resulting code in the jump slot is: - * - * sethi %hi(. - .PLT0), %g1 - * sethi %h44(addr), %g1 - * or %g1, %m44(addr), %g1 - * sllx %g1, 12, %g1 - * jmp %g1+%l44(addr) * nop * nop * nop * - */ - where[4] = JMP | LOVAL(offset); - flush(where, 16); - where[3] = SLLX | 12; - flush(where, 12); - where[2] = OR | (((offset) >> 12) & 0x00001fff); - flush(where, 8); - where[1] = SETHI | HIVAL(offset, 22); - flush(where, 4); - } else if (offset < 0 && offset > -(1L<<44)) { - /* - * We're withing 44 bits. We can generate this pattern: - * - * The resulting code in the jump slot is: - * - * sethi %hi(. - .PLT0), %g1 - * sethi %h44(-addr), %g1 - * xor %g1, %m44(-addr), %g1 - * sllx %g1, 12, %g1 - * jmp %g1+%l44(addr) - * nop - * nop - * nop + * When we replace these entries we start from the second + * entry and do it in reverse order so the last thing we + * do is replace the branch. That allows us to change this + * atomically. * + * We now need to find out how far we need to jump. We + * have a choice of several different relocation techniques + * which are increasingly expensive. */ - where[4] = JMP | LOVAL(offset); - flush(where, 16); - where[3] = SLLX | 12; - flush(where, 12); - where[2] = XOR | (((~offset) >> 12) & 0x00001fff); - flush(where, 8); - where[1] = SETHI | HIVAL(~offset, 22); - flush(where, 4); + where = (Elf_Half *)wherep; + offset = ((Elf_Addr)where) - target; + if (offset <= (1L<<20) && offset >= -(1L<<20)) { + /* + * We're within 1MB -- we can use a direct branch insn. + * + * We can generate this pattern: + * + * sethi %hi(. - .PLT0), %g1 + * ba,a %xcc, addr + * nop + * nop + * nop + * nop + * nop + * nop + * + */ + where[1] = BAA | ((offset >> 2) &0x3fffff); + flush(where, 4); + } else if (target >= 0 && target < (1L<<32)) { + /* + * We're withing 32-bits of address zero. + * + * The resulting code in the jump slot is: + * + * sethi %hi(. - .PLT0), %g1 + * sethi %hi(addr), %g1 + * jmp %g1+%lo(addr) + * nop + * nop + * nop + * nop + * nop + * + */ + where[2] = JMP | LOVAL(target); + flush(where, 8); + where[1] = SETHI | HIVAL(target, 10); + flush(where, 4); + } else if (target <= 0 && target > -(1L<<32)) { + /* + * We're withing 32-bits of address -1. + * + * The resulting code in the jump slot is: + * + * sethi %hi(. - .PLT0), %g1 + * sethi %hix(addr), %g1 + * xor %g1, %lox(addr), %g1 + * jmp %g1 + * nop + * nop + * nop + * nop + * + */ + where[3] = JMP; + flush(where, 12); + where[2] = XOR | ((~target) & 0x00001fff); + flush(where, 8); + where[1] = SETHI | HIVAL(~target, 10); + flush(where, 4); + } else if (offset <= (1L<<32) && offset >= -((1L<<32) - 4)) { + /* + * We're withing 32-bits -- we can use a direct call + * insn + * + * The resulting code in the jump slot is: + * + * sethi %hi(. - .PLT0), %g1 + * mov %o7, %g1 + * call (.+offset) + * mov %g1, %o7 + * nop + * nop + * nop + * nop + * + */ + where[3] = MOV17; + flush(where, 12); + where[2] = CALL | ((offset >> 4) & 0x3fffffff); + flush(where, 8); + where[1] = MOV71; + flush(where, 4); + } else if (offset >= 0 && offset < (1L<<44)) { + /* + * We're withing 44 bits. We can generate this pattern: + * + * The resulting code in the jump slot is: + * + * sethi %hi(. - .PLT0), %g1 + * sethi %h44(addr), %g1 + * or %g1, %m44(addr), %g1 + * sllx %g1, 12, %g1 + * jmp %g1+%l44(addr) + * nop + * nop + * nop + * + */ + where[4] = JMP | LOVAL(offset); + flush(where, 16); + where[3] = SLLX | 12; + flush(where, 12); + where[2] = OR | (((offset) >> 12) & 0x00001fff); + flush(where, 8); + where[1] = SETHI | HIVAL(offset, 22); + flush(where, 4); + } else if (offset < 0 && offset > -(1L<<44)) { + /* + * We're withing 44 bits. We can generate this pattern: + * + * The resulting code in the jump slot is: + * + * sethi %hi(. - .PLT0), %g1 + * sethi %h44(-addr), %g1 + * xor %g1, %m44(-addr), %g1 + * sllx %g1, 12, %g1 + * jmp %g1+%l44(addr) + * nop + * nop + * nop + * + */ + where[4] = JMP | LOVAL(offset); + flush(where, 16); + where[3] = SLLX | 12; + flush(where, 12); + where[2] = XOR | (((~offset) >> 12) & 0x00001fff); + flush(where, 8); + where[1] = SETHI | HIVAL(~offset, 22); + flush(where, 4); + } else { + /* + * We need to load all 64-bits + * + * The resulting code in the jump slot is: + * + * sethi %hi(. - .PLT0), %g1 + * sethi %hh(addr), %g1 + * sethi %lm(addr), %g5 + * or %g1, %hm(addr), %g1 + * sllx %g1, 32, %g1 + * or %g1, %g5, %g1 + * jmp %g1+%lo(addr) + * nop + * + */ + where[6] = JMP | LOVAL(target); + flush(where, 24); + where[5] = ORG5; + flush(where, 20); + where[4] = SLLX | 12; + flush(where, 16); + where[3] = OR | LOVAL((target) >> 32); + flush(where, 12); + where[2] = SETHIG5 | HIVAL(target, 10); + flush(where, 8); + where[1] = SETHI | HIVAL(target, 42); + flush(where, 4); + } } else { - /* - * We need to load all 64-bits - * - * The resulting code in the jump slot is: - * - * sethi %hi(. - .PLT0), %g1 - * sethi %hh(addr), %g1 - * sethi %lm(addr), %g5 - * or %g1, %hm(addr), %g1 - * sllx %g1, 32, %g1 - * or %g1, %g5, %g1 - * jmp %g1+%lo(addr) - * nop - * + /* + * This is a high PLT slot; the relocation offset specifies a + * pointer that needs to be frobbed; no actual code needs to + * be modified. The pointer to be calculated needs the addend + * added and the reference object relocation base subtraced. */ - where[6] = JMP | LOVAL(target); - flush(where, 24); - where[5] = ORG5; - flush(where, 20); - where[4] = SLLX | 12; - flush(where, 16); - where[3] = OR | LOVAL((target) >> 32); - flush(where, 12); - where[2] = SETHIG5 | HIVAL(target, 10); - flush(where, 8); - where[1] = SETHI | HIVAL(target, 42); - flush(where, 4); + *wherep = target + rela->r_addend - + (Elf_Addr)refobj->relocbase; } return (target); diff --git a/libexec/rtld-elf/sparc64/rtld_machdep.h b/libexec/rtld-elf/sparc64/rtld_machdep.h index f91840f..0d028c8 100644 --- a/libexec/rtld-elf/sparc64/rtld_machdep.h +++ b/libexec/rtld-elf/sparc64/rtld_machdep.h @@ -50,7 +50,9 @@ struct Struct_Obj_Entry; #define rtld_dynamic(obj) ((Elf_Dyn *)(((char *)&_DYNAMIC) + (vm_offset_t)(obj)->relocbase)) Elf_Addr reloc_jmpslot(Elf_Addr *, Elf_Addr, - const struct Struct_Obj_Entry *obj); + const struct Struct_Obj_Entry *, + const struct Struct_Obj_Entry *, + const Elf_Rel *); #define make_function_pointer(def, defobj) \ ((defobj)->relocbase + (def)->st_value) diff --git a/libexec/rtld-elf/sparc64/rtld_start.S b/libexec/rtld-elf/sparc64/rtld_start.S index 4de66cf..b97e798 100644 --- a/libexec/rtld-elf/sparc64/rtld_start.S +++ b/libexec/rtld-elf/sparc64/rtld_start.S @@ -85,43 +85,50 @@ END(.rtld_start) * * n = x >> 15; * - * and _rtld_bind_start_0(x, y) does: + * and _rtld_bind_start_0(x, y) should do, according to the SCD: * - * i = x - y + 1048596; + * i = x - y - 1048596; * n = 32768 + (i/5120)*160 + (i%5120)/24; * + * Note that the number of 1048596 from above is incorrect; rather, + * we need to use HIPLTOFFS as defined below. + * * Neither routine needs to issue a save since it's already been * done in the PLT entry. */ +#define NPLTLOSLOTS 32768 +#define PLTSLOTSZ 32 +/* + * - 16 to compensate for the difference of the positions of the jumps that + * generate the arguments in .PLT0 and the high plt entry. + */ +#define HIPLTOFFS (NPLTLOSLOTS * PLTSLOTSZ - 16) + ENTRY(_rtld_bind_start_0) - sethi %hi(1048596), %l1 + sethi %hi(HIPLTOFFS), %l1 + or %l1, %lo(HIPLTOFFS), %l1 sub %o0, %o1, %l0 /* x - y */ - or %l1, %lo(1048596), %l1 - add %l0, %l1, %l0 /* x - y + 1048596 */ - - sdivx %l0, 5120, %l1 /* Calculate i/5120 */ - ldx [%o1 + (10*4)], %o0 /* Load object pointer from PLT2 */ - sub %l0, %l1, %l2 /* And i%5120 */ + sub %l0, %l1, %l0 /* i = x - y - HIPLTOFFS */ + sethi %hi(5120), %l7 + sdivx %l0, %l7, %l1 /* Calculate i / 5120 */ + mulx %l1, %l7, %l3 + sub %l0, %l3, %l2 /* And i % 5120 */ + mulx %l1, 160, %l5 /* (i / 5120) * 160 */ + sdivx %l2, 24, %l4 /* (i % 5120) / 24 */ + sethi %hi(NPLTLOSLOTS), %l6 + add %l4, %l5, %l4 /* (i / 5120) * 160 + (i % 5120) / 24 */ + add %l4, %l6, %l4 /* + NPLTLOSLOTS */ + sub %l4, 4, %l4 /* XXX: 4 entries are reserved */ - /* Let the division churn for a bit. */ - sdivx %l2, 14, %l4 /* (i%5120)/24 */ + sllx %l4, 1, %l5 /* Each element is an Elf_Rela which */ + add %l5, %l4, %l4 /* is 3 longwords or 24 bytes. */ + sllx %l4, 3, %l4 /* So multiply by 24. */ - /* 160 is (32 * 5) or (32 * (4 + 1)) */ - sllx %l1, 2, %l3 /* 4 * (i/5120) */ - add %l1, %l3, %l3 /* 5 * (i/5120) */ - sllx %l3, 5, %l3 /* 32 * 5 * (i/5120) */ + ldx [%o1 + (10*4)], %o0 /* Load object pointer from PLT2 */ - sethi %hi(32768), %l6 - add %l3, %l4, %l5 /* %l5 = (i/5120)*160 + (i%5120)/24; */ - add %l5, %l6, %l5 - - sllx %l5, 1, %l6 /* Each element is an Elf_Rela which */ - add %l6, %l5, %l5 /* is 3 longwords or 24 bytes. */ - sllx %l5, 3, %l5 /* So multiply by 24. */ - call _rtld_bind /* Call _rtld_bind(obj, offset) */ - mov %l5, %o1 + mov %l4, %o1 jmp %o0 /* return value == function address */ restore /* Dump our stack frame */ -- cgit v1.1