diff options
Diffstat (limited to 'contrib/gcc/config/s390/s390.c')
-rw-r--r-- | contrib/gcc/config/s390/s390.c | 3534 |
1 files changed, 2907 insertions, 627 deletions
diff --git a/contrib/gcc/config/s390/s390.c b/contrib/gcc/config/s390/s390.c index 4c96dce..d6fda65 100644 --- a/contrib/gcc/config/s390/s390.c +++ b/contrib/gcc/config/s390/s390.c @@ -1,5 +1,5 @@ /* Subroutines used for code generation on IBM S/390 and zSeries - Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. + Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc. Contributed by Hartmut Penner (hpenner@de.ibm.com) and Ulrich Weigand (uweigand@de.ibm.com). @@ -45,11 +45,22 @@ Boston, MA 02111-1307, USA. */ #include "target.h" #include "target-def.h" #include "debug.h" - +#include "langhooks.h" +#include "optabs.h" static bool s390_assemble_integer PARAMS ((rtx, unsigned int, int)); static int s390_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static int s390_adjust_priority PARAMS ((rtx, int)); +static void s390_select_rtx_section PARAMS ((enum machine_mode, rtx, + unsigned HOST_WIDE_INT)); +static void s390_encode_section_info PARAMS ((tree, int)); +static const char *s390_strip_name_encoding PARAMS ((const char *)); +static bool s390_cannot_force_const_mem PARAMS ((rtx)); +static void s390_init_builtins PARAMS ((void)); +static rtx s390_expand_builtin PARAMS ((tree, rtx, rtx, + enum machine_mode, int)); +static void s390_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree)); #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t" @@ -58,24 +69,43 @@ static int s390_adjust_priority PARAMS ((rtx, int)); #undef TARGET_ASM_INTEGER #define TARGET_ASM_INTEGER s390_assemble_integer -#undef TARGET_ASM_FUNCTION_PROLOGUE -#define TARGET_ASM_FUNCTION_PROLOGUE s390_function_prologue - -#undef TARGET_ASM_FUNCTION_EPILOGUE -#define TARGET_ASM_FUNCTION_EPILOGUE s390_function_epilogue - #undef TARGET_ASM_OPEN_PAREN #define TARGET_ASM_OPEN_PAREN "" #undef TARGET_ASM_CLOSE_PAREN #define TARGET_ASM_CLOSE_PAREN "" +#undef TARGET_ASM_SELECT_RTX_SECTION +#define TARGET_ASM_SELECT_RTX_SECTION s390_select_rtx_section + #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST s390_adjust_cost #undef TARGET_SCHED_ADJUST_PRIORITY #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO s390_encode_section_info +#undef TARGET_STRIP_NAME_ENCODING +#define TARGET_STRIP_NAME_ENCODING s390_strip_name_encoding + +#ifdef HAVE_AS_TLS +#undef TARGET_HAVE_TLS +#define TARGET_HAVE_TLS true +#endif +#undef TARGET_CANNOT_FORCE_CONST_MEM +#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem + +#undef TARGET_INIT_BUILTINS +#define TARGET_INIT_BUILTINS s390_init_builtins +#undef TARGET_EXPAND_BUILTIN +#define TARGET_EXPAND_BUILTIN s390_expand_builtin + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true + struct gcc_target targetm = TARGET_INITIALIZER; extern int reload_completed; @@ -83,13 +113,13 @@ extern int reload_completed; /* The alias set for prologue/epilogue register save/restore. */ static int s390_sr_alias_set = 0; -/* Function count for creating unique internal labels in a compile unit. */ -int s390_function_count = 0; - /* Save information from a "cmpxx" operation until the branch or scc is emitted. */ rtx s390_compare_op0, s390_compare_op1; +/* The encoding characters for the four TLS models present in ELF. */ +static char const tls_model_chars[] = " GLil"; + /* Structure used to hold the components of a S/390 memory address. A legitimate address on S/390 is of the general form @@ -107,19 +137,26 @@ struct s390_address int pointer; }; -/* Structure containing information for prologue and epilogue. */ +/* Define the structure for the machine field in struct function. */ -struct s390_frame +struct machine_function GTY(()) { - int frame_pointer_p; - int return_reg_saved_p; + /* Label of start of initial literal pool. */ + rtx literal_pool_label; + + /* Set, if some of the fprs 8-15 need to be saved (64 bit abi). */ int save_fprs_p; + + /* Number of first and last gpr to be saved, restored. */ int first_save_gpr; int first_restore_gpr; int last_save_gpr; - int arg_frame_offset; + /* Size of stack frame. */ HOST_WIDE_INT frame_size; + + /* Some local-dynamic TLS symbol name. */ + const char *some_ld_name; }; static int s390_match_ccmode_set PARAMS ((rtx, enum machine_mode)); @@ -127,20 +164,29 @@ static int s390_branch_condition_mask PARAMS ((rtx)); static const char *s390_branch_condition_mnemonic PARAMS ((rtx, int)); static int check_mode PARAMS ((rtx, enum machine_mode *)); static int general_s_operand PARAMS ((rtx, enum machine_mode, int)); -static int s390_decompose_address PARAMS ((rtx, struct s390_address *, int)); +static int s390_decompose_address PARAMS ((rtx, struct s390_address *)); +static rtx get_thread_pointer PARAMS ((void)); +static rtx legitimize_tls_address PARAMS ((rtx, rtx)); +static const char *get_some_local_dynamic_name PARAMS ((void)); +static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *)); static int reg_used_in_mem_p PARAMS ((int, rtx)); static int addr_generation_dependency_p PARAMS ((rtx, rtx)); -static void s390_split_branches PARAMS ((void)); +static int s390_split_branches PARAMS ((rtx, bool *)); static void find_constant_pool_ref PARAMS ((rtx, rtx *)); static void replace_constant_pool_ref PARAMS ((rtx *, rtx, rtx)); -static void s390_chunkify_pool PARAMS ((void)); -static int save_fprs_p PARAMS ((void)); +static int find_base_register_in_addr PARAMS ((struct s390_address *)); +static bool find_base_register_ref PARAMS ((rtx)); +static void replace_base_register_ref PARAMS ((rtx *, rtx)); +static void s390_optimize_prolog PARAMS ((int)); +static bool s390_fixup_clobbered_return_reg PARAMS ((rtx)); static int find_unused_clobbered_reg PARAMS ((void)); -static void s390_frame_info PARAMS ((struct s390_frame *)); +static void s390_frame_info PARAMS ((void)); static rtx save_fpr PARAMS ((rtx, int, int)); static rtx restore_fpr PARAMS ((rtx, int, int)); +static rtx save_gprs PARAMS ((rtx, int, int, int)); +static rtx restore_gprs PARAMS ((rtx, int, int, int)); static int s390_function_arg_size PARAMS ((enum machine_mode, tree)); - +static struct machine_function * s390_init_machine_status PARAMS ((void)); /* Return true if SET either doesn't set the CC register, or else the source and destination have matching CC modes and that @@ -163,19 +209,28 @@ s390_match_ccmode_set (set, req_mode) switch (set_mode) { case CCSmode: - if (req_mode != CCSmode) - return 0; - break; + case CCSRmode: case CCUmode: - if (req_mode != CCUmode) - return 0; - break; + case CCURmode: case CCLmode: - if (req_mode != CCLmode) + case CCL1mode: + case CCL2mode: + case CCT1mode: + case CCT2mode: + case CCT3mode: + if (req_mode != set_mode) return 0; break; + case CCZmode: - if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode) + if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode + && req_mode != CCSRmode && req_mode != CCURmode) + return 0; + break; + + case CCAPmode: + case CCANmode: + if (req_mode != CCAmode) return 0; break; @@ -188,7 +243,8 @@ s390_match_ccmode_set (set, req_mode) /* Return true if every SET in INSN that sets the CC register has source and destination with matching CC modes and that - CC mode is at least as constrained as REQ_MODE. */ + CC mode is at least as constrained as REQ_MODE. + If REQ_MODE is VOIDmode, always return false. */ int s390_match_ccmode (insn, req_mode) @@ -197,6 +253,10 @@ s390_match_ccmode (insn, req_mode) { int i; + /* s390_tm_ccmode returns VOIDmode to indicate failure. */ + if (req_mode == VOIDmode) + return 0; + if (GET_CODE (PATTERN (insn)) == SET) return s390_match_ccmode_set (PATTERN (insn), req_mode); @@ -212,6 +272,45 @@ s390_match_ccmode (insn, req_mode) return 1; } +/* If a test-under-mask instruction can be used to implement + (compare (and ... OP1) OP2), return the CC mode required + to do that. Otherwise, return VOIDmode. + MIXED is true if the instruction can distinguish between + CC1 and CC2 for mixed selected bits (TMxx), it is false + if the instruction cannot (TM). */ + +enum machine_mode +s390_tm_ccmode (op1, op2, mixed) + rtx op1; + rtx op2; + int mixed; +{ + int bit0, bit1; + + /* ??? Fixme: should work on CONST_DOUBLE as well. */ + if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT) + return VOIDmode; + + /* Selected bits all zero: CC0. */ + if (INTVAL (op2) == 0) + return CCTmode; + + /* Selected bits all one: CC3. */ + if (INTVAL (op2) == INTVAL (op1)) + return CCT3mode; + + /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. */ + if (mixed) + { + bit1 = exact_log2 (INTVAL (op2)); + bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2)); + if (bit0 != -1 && bit1 != -1) + return bit0 > bit1 ? CCT1mode : CCT2mode; + } + + return VOIDmode; +} + /* Given a comparison code OP (EQ, NE, etc.) and the operands OP0 and OP1 of a COMPARE, return the mode to be used for the comparison. */ @@ -226,16 +325,49 @@ s390_select_ccmode (code, op0, op1) { case EQ: case NE: + if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT + && CONST_OK_FOR_LETTER_P (INTVAL (XEXP (op0, 1)), 'K')) + return CCAPmode; if (GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS || GET_CODE (op1) == NEG) return CCLmode; + if (GET_CODE (op0) == AND) + { + /* Check whether we can potentially do it via TM. */ + enum machine_mode ccmode; + ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1); + if (ccmode != VOIDmode) + { + /* Relax CCTmode to CCZmode to allow fall-back to AND + if that turns out to be beneficial. */ + return ccmode == CCTmode ? CCZmode : ccmode; + } + } + + if (register_operand (op0, HImode) + && GET_CODE (op1) == CONST_INT + && (INTVAL (op1) == -1 || INTVAL (op1) == 65535)) + return CCT3mode; + if (register_operand (op0, QImode) + && GET_CODE (op1) == CONST_INT + && (INTVAL (op1) == -1 || INTVAL (op1) == 255)) + return CCT3mode; + return CCZmode; case LE: case LT: case GE: case GT: + if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT + && CONST_OK_FOR_LETTER_P (INTVAL (XEXP (op0, 1)), 'K')) + { + if (INTVAL (XEXP((op0), 1)) < 0) + return CCANmode; + else + return CCAPmode; + } case UNORDERED: case ORDERED: case UNEQ: @@ -244,12 +376,29 @@ s390_select_ccmode (code, op0, op1) case UNGE: case UNGT: case LTGT: + if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) + && GET_CODE (op1) != CONST_INT) + return CCSRmode; return CCSmode; - case LEU: case LTU: case GEU: + if (GET_CODE (op0) == PLUS) + return CCL1mode; + + if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) + && GET_CODE (op1) != CONST_INT) + return CCURmode; + return CCUmode; + + case LEU: case GTU: + if (GET_CODE (op0) == MINUS) + return CCL2mode; + + if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) + && GET_CODE (op1) != CONST_INT) + return CCURmode; return CCUmode; default: @@ -286,13 +435,61 @@ s390_branch_condition_mask (code) } break; + case CCT1mode: + switch (GET_CODE (code)) + { + case EQ: return CC1; + case NE: return CC0 | CC2 | CC3; + default: + abort (); + } + break; + + case CCT2mode: + switch (GET_CODE (code)) + { + case EQ: return CC2; + case NE: return CC0 | CC1 | CC3; + default: + abort (); + } + break; + + case CCT3mode: + switch (GET_CODE (code)) + { + case EQ: return CC3; + case NE: return CC0 | CC1 | CC2; + default: + abort (); + } + break; + case CCLmode: switch (GET_CODE (code)) { case EQ: return CC0 | CC2; case NE: return CC1 | CC3; - case UNORDERED: return CC2 | CC3; /* carry */ - case ORDERED: return CC0 | CC1; /* no carry */ + default: + abort (); + } + break; + + case CCL1mode: + switch (GET_CODE (code)) + { + case LTU: return CC2 | CC3; /* carry */ + case GEU: return CC0 | CC1; /* no carry */ + default: + abort (); + } + break; + + case CCL2mode: + switch (GET_CODE (code)) + { + case GTU: return CC0 | CC1; /* borrow */ + case LEU: return CC2 | CC3; /* no borrow */ default: abort (); } @@ -312,6 +509,48 @@ s390_branch_condition_mask (code) } break; + case CCURmode: + switch (GET_CODE (code)) + { + case EQ: return CC0; + case NE: return CC2 | CC1 | CC3; + case LTU: return CC2; + case GTU: return CC1; + case LEU: return CC0 | CC2; + case GEU: return CC0 | CC1; + default: + abort (); + } + break; + + case CCAPmode: + switch (GET_CODE (code)) + { + case EQ: return CC0; + case NE: return CC1 | CC2 | CC3; + case LT: return CC1 | CC3; + case GT: return CC2; + case LE: return CC0 | CC1 | CC3; + case GE: return CC0 | CC2; + default: + abort (); + } + break; + + case CCANmode: + switch (GET_CODE (code)) + { + case EQ: return CC0; + case NE: return CC1 | CC2 | CC3; + case LT: return CC1; + case GT: return CC2 | CC3; + case LE: return CC0 | CC1; + case GE: return CC0 | CC2 | CC3; + default: + abort (); + } + break; + case CCSmode: switch (GET_CODE (code)) { @@ -332,6 +571,29 @@ s390_branch_condition_mask (code) default: abort (); } + break; + + case CCSRmode: + switch (GET_CODE (code)) + { + case EQ: return CC0; + case NE: return CC2 | CC1 | CC3; + case LT: return CC2; + case GT: return CC1; + case LE: return CC0 | CC2; + case GE: return CC0 | CC1; + case UNORDERED: return CC3; + case ORDERED: return CC0 | CC2 | CC1; + case UNEQ: return CC0 | CC3; + case UNLT: return CC2 | CC3; + case UNGT: return CC1 | CC3; + case UNLE: return CC0 | CC2 | CC3; + case UNGE: return CC0 | CC1 | CC3; + case LTGT: return CC2 | CC1; + default: + abort (); + } + break; default: abort (); @@ -347,7 +609,7 @@ s390_branch_condition_mnemonic (code, inv) rtx code; int inv; { - static const char *mnemonic[16] = + static const char *const mnemonic[16] = { NULL, "o", "h", "nle", "l", "nhe", "lh", "ne", @@ -379,7 +641,7 @@ s390_single_hi (op, mode, def) { if (GET_CODE (op) == CONST_INT) { - unsigned HOST_WIDE_INT value; + unsigned HOST_WIDE_INT value = 0; int n_parts = GET_MODE_SIZE (mode) / 2; int i, part = -1; @@ -405,7 +667,7 @@ s390_single_hi (op, mode, def) else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == VOIDmode) { - unsigned HOST_WIDE_INT value; + unsigned HOST_WIDE_INT value = 0; int n_parts = GET_MODE_SIZE (mode) / 2; int i, part = -1; @@ -482,7 +744,7 @@ s390_single_qi (op, mode, def) { if (GET_CODE (op) == CONST_INT) { - unsigned HOST_WIDE_INT value; + unsigned HOST_WIDE_INT value = 0; int n_parts = GET_MODE_SIZE (mode); int i, part = -1; @@ -508,7 +770,7 @@ s390_single_qi (op, mode, def) else if (GET_CODE (op) == CONST_DOUBLE && GET_MODE (op) == VOIDmode) { - unsigned HOST_WIDE_INT value; + unsigned HOST_WIDE_INT value = 0; int n_parts = GET_MODE_SIZE (mode); int i, part = -1; @@ -572,6 +834,42 @@ s390_extract_qi (op, mode, part) abort (); } +/* Check whether we can (and want to) split a double-word + move in mode MODE from SRC to DST into two single-word + moves, moving the subword FIRST_SUBWORD first. */ + +bool +s390_split_ok_p (dst, src, mode, first_subword) + rtx dst; + rtx src; + enum machine_mode mode; + int first_subword; +{ + /* Floating point registers cannot be split. */ + if (FP_REG_P (src) || FP_REG_P (dst)) + return false; + + /* We don't need to split if operands are directly accessable. */ + if (s_operand (src, mode) || s_operand (dst, mode)) + return false; + + /* Non-offsettable memory references cannot be split. */ + if ((GET_CODE (src) == MEM && !offsettable_memref_p (src)) + || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst))) + return false; + + /* Moving the first subword must not clobber a register + needed to move the second subword. */ + if (register_operand (dst, mode)) + { + rtx subreg = operand_subword (dst, first_subword, 0, mode); + if (reg_overlap_mentioned_p (subreg, src)) + return false; + } + + return true; +} + /* Change optimizations to be performed, depending on the optimization level. @@ -579,18 +877,19 @@ s390_extract_qi (op, mode, part) LEVEL is the optimization level specified; 2 if `-O2' is specified, 1 if `-O' is specified, and 0 if neither is specified. - SIZE is non-zero if `-Os' is specified and zero otherwise. */ + SIZE is nonzero if `-Os' is specified and zero otherwise. */ void optimization_options (level, size) int level ATTRIBUTE_UNUSED; int size ATTRIBUTE_UNUSED; { -#ifdef HAVE_decrement_and_branch_on_count - /* When optimizing, enable use of BRCT instruction. */ - if (level >= 1) - flag_branch_on_count_reg = 1; -#endif + /* ??? There are apparently still problems with -fcaller-saves. */ + flag_caller_saves = 0; + + /* By default, always emit DWARF-2 unwind info. This allows debugging + without maintaining a stack frame back-chain. */ + flag_asynchronous_unwind_tables = 1; } void @@ -598,12 +897,14 @@ override_options () { /* Acquire a unique set number for our register saves and restores. */ s390_sr_alias_set = new_alias_set (); -} + /* Set up function hooks. */ + init_machine_status = s390_init_machine_status; +} /* Map for smallest class containing reg regno. */ -enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = +const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] = { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, @@ -674,6 +975,8 @@ larl_operand (op, mode) if (GET_CODE (op) == LABEL_REF) return 1; if (GET_CODE (op) == SYMBOL_REF + && XSTR (op, 0)[0] != '@' + && !tls_symbolic_operand (op) && (!flag_pic || SYMBOL_REF_FLAG (op) || CONSTANT_POOL_ADDRESS_P (op))) return 1; @@ -696,39 +999,27 @@ larl_operand (op, mode) if (GET_CODE (op) == LABEL_REF) return 1; if (GET_CODE (op) == SYMBOL_REF + && XSTR (op, 0)[0] != '@' + && !tls_symbolic_operand (op) && (!flag_pic || SYMBOL_REF_FLAG (op) || CONSTANT_POOL_ADDRESS_P (op))) return 1; - /* Now we must have a @GOTENT offset or @PLT stub. */ + /* Now we must have a @GOTENT offset or @PLT stub + or an @INDNTPOFF TLS offset. */ if (GET_CODE (op) == UNSPEC && XINT (op, 1) == 111) return 1; if (GET_CODE (op) == UNSPEC && XINT (op, 1) == 113) return 1; + if (GET_CODE (op) == UNSPEC + && XINT (op, 1) == UNSPEC_INDNTPOFF) + return 1; return 0; } -/* Return true if OP is a valid FP-Register. - OP is the current operation. - MODE is the current operation mode. */ - -int -fp_operand (op, mode) - register rtx op; - enum machine_mode mode; -{ - register enum rtx_code code = GET_CODE (op); - if (! check_mode (op, &mode)) - return 0; - if (code == REG && REGNO_OK_FOR_FP_P (REGNO (op))) - return 1; - else - return 0; -} - /* Helper routine to implement s_operand and s_imm_operand. OP is the current operation. MODE is the current operation mode. @@ -777,7 +1068,7 @@ general_s_operand (op, mode, allow_immediate) case MEM: if (GET_CODE (XEXP (op, 0)) == ADDRESSOF) return 1; - if (s390_decompose_address (XEXP (op, 0), &addr, FALSE) + if (s390_decompose_address (XEXP (op, 0), &addr) && !addr.indx) return 1; break; @@ -815,6 +1106,41 @@ s_imm_operand (op, mode) return general_s_operand (op, mode, 1); } +/* Return true if OP is a valid operand for a 'Q' constraint. + This differs from s_operand in that only memory operands + without index register are accepted, nothing else. */ + +int +q_constraint (op) + register rtx op; +{ + struct s390_address addr; + + if (GET_CODE (op) != MEM) + return 0; + + if (!s390_decompose_address (XEXP (op, 0), &addr)) + return 0; + + if (addr.indx) + return 0; + + return 1; +} + +/* Return the cost of an address rtx ADDR. */ + +int +s390_address_cost (addr) + rtx addr; +{ + struct s390_address ad; + if (!s390_decompose_address (addr, &ad)) + return 1000; + + return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1); +} + /* Return true if OP is a valid operand for the BRAS instruction. OP is the current operation. MODE is the current operation mode. */ @@ -838,6 +1164,23 @@ bras_sym_operand (op, mode) return 0; } +/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode, + otherwise return 0. */ + +int +tls_symbolic_operand (op) + register rtx op; +{ + const char *symbol_str; + + if (GET_CODE (op) != SYMBOL_REF) + return 0; + symbol_str = XSTR (op, 0); + + if (symbol_str[0] != '%') + return 0; + return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars; +} /* Return true if OP is a load multiple operation. It is known to be a PARALLEL and the first section will be tested. @@ -997,6 +1340,37 @@ symbolic_reference_mentioned_p (op) return 0; } +/* Return true if OP contains a reference to a thread-local symbol. */ + +int +tls_symbolic_reference_mentioned_p (op) + rtx op; +{ + register const char *fmt; + register int i; + + if (GET_CODE (op) == SYMBOL_REF) + return tls_symbolic_operand (op); + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + register int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return 1; + } + + else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i))) + return 1; + } + + return 0; +} + /* Return true if OP is a legitimate general operand when generating PIC code. It is given that flag_pic is on @@ -1011,7 +1385,7 @@ legitimate_pic_operand_p (op) return 1; /* Reject everything else; must be handled - via emit_pic_move. */ + via emit_symbolic_move. */ return 0; } @@ -1026,22 +1400,87 @@ legitimate_constant_p (op) if (!SYMBOLIC_CONST (op)) return 1; + /* Accept immediate LARL operands. */ + if (TARGET_64BIT && larl_operand (op, VOIDmode)) + return 1; + + /* Thread-local symbols are never legal constants. This is + so that emit_call knows that computing such addresses + might require a function call. */ + if (TLS_SYMBOLIC_CONST (op)) + return 0; + /* In the PIC case, symbolic constants must *not* be forced into the literal pool. We accept them here, - so that they will be handled by emit_pic_move. */ + so that they will be handled by emit_symbolic_move. */ if (flag_pic) return 1; - /* Even in the non-PIC case, we can accept immediate - LARL operands here. */ - if (TARGET_64BIT) - return larl_operand (op, VOIDmode); - /* All remaining non-PIC symbolic constants are forced into the literal pool. */ return 0; } +/* Determine if it's legal to put X into the constant pool. This + is not possible if X contains the address of a symbol that is + not constant (TLS) or not known at final link time (PIC). */ + +static bool +s390_cannot_force_const_mem (x) + rtx x; +{ + switch (GET_CODE (x)) + { + case CONST_INT: + case CONST_DOUBLE: + /* Accept all non-symbolic constants. */ + return false; + + case LABEL_REF: + /* Labels are OK iff we are non-PIC. */ + return flag_pic != 0; + + case SYMBOL_REF: + /* 'Naked' TLS symbol references are never OK, + non-TLS symbols are OK iff we are non-PIC. */ + if (tls_symbolic_operand (x)) + return true; + else + return flag_pic != 0; + + case CONST: + return s390_cannot_force_const_mem (XEXP (x, 0)); + case PLUS: + case MINUS: + return s390_cannot_force_const_mem (XEXP (x, 0)) + || s390_cannot_force_const_mem (XEXP (x, 1)); + + case UNSPEC: + switch (XINT (x, 1)) + { + /* Only lt-relative or GOT-relative UNSPECs are OK. */ + case 100: + case 104: + case 112: + case 114: + case UNSPEC_TLSGD: + case UNSPEC_TLSLDM: + case UNSPEC_NTPOFF: + case UNSPEC_DTPOFF: + case UNSPEC_GOTNTPOFF: + case UNSPEC_INDNTPOFF: + return false; + + default: + return true; + } + break; + + default: + abort (); + } +} + /* Returns true if the constant value OP is a legitimate general operand during and after reload. The difference to legitimate_constant_p is that this function will not accept @@ -1067,15 +1506,6 @@ legitimate_reload_constant_p (op) && larl_operand (op, VOIDmode)) return 1; - /* If reload is completed, and we do not already have a - literal pool, and OP must be forced to the literal - pool, then something must have gone wrong earlier. - We *cannot* force the constant any more, because the - prolog generation already decided we don't need to - set up the base register. */ - if (reload_completed && !regs_ever_live[BASE_REGISTER]) - abort (); - /* Everything else cannot be handled without reload. */ return 0; } @@ -1097,23 +1527,14 @@ s390_preferred_reload_class (op, class) switch (GET_CODE (op)) { /* Constants we cannot reload must be forced into the - literal pool. For constants we *could* handle directly, - it might still be preferable to put them in the pool and - use a memory-to-memory instruction. - - However, try to avoid needlessly allocating a literal - pool in a routine that wouldn't otherwise need any. - Heuristically, we assume that 64-bit leaf functions - typically don't need a literal pool, all others do. */ + literal pool. */ + case CONST_DOUBLE: case CONST_INT: - if (!legitimate_reload_constant_p (op)) - return NO_REGS; - - if (TARGET_64BIT && current_function_is_leaf) + if (legitimate_reload_constant_p (op)) return class; - - return NO_REGS; + else + return NO_REGS; /* If a symbolic constant or a PLUS is reloaded, it is most likely being used as an address, so @@ -1153,6 +1574,29 @@ s390_secondary_input_reload_class (class, mode, in) return NO_REGS; } +/* Return the register class of a scratch register needed to + store a register of class CLASS in MODE into OUT: + + We need a temporary when storing a double-word to a + non-offsettable memory address. */ + +enum reg_class +s390_secondary_output_reload_class (class, mode, out) + enum reg_class class; + enum machine_mode mode; + rtx out; +{ + if ((TARGET_64BIT ? mode == TImode + : (mode == DImode || mode == DFmode)) + && reg_classes_intersect_p (GENERAL_REGS, class) + && GET_CODE (out) == MEM + && !offsettable_memref_p (out) + && !s_operand (out, VOIDmode)) + return ADDR_REGS; + + return NO_REGS; +} + /* Return true if OP is a PLUS that is not a legitimate operand for the LA instruction. OP is the current operation. @@ -1180,21 +1624,13 @@ s390_plus_operand (op, mode) SCRATCH may be used as scratch register. */ void -s390_expand_plus_operand (target, src, scratch_in) +s390_expand_plus_operand (target, src, scratch) register rtx target; register rtx src; - register rtx scratch_in; + register rtx scratch; { - rtx sum1, sum2, scratch; - - /* ??? reload apparently does not ensure that the scratch register - and the target do not overlap. We absolutely require this to be - the case, however. Therefore the reload_in[sd]i patterns ask for - a double-sized scratch register, and if one part happens to be - equal to the target, we use the other one. */ - scratch = gen_rtx_REG (Pmode, REGNO (scratch_in)); - if (rtx_equal_p (scratch, target)) - scratch = gen_rtx_REG (Pmode, REGNO (scratch_in) + 1); + rtx sum1, sum2; + struct s390_address ad; /* src must be a PLUS; get its two operands. */ if (GET_CODE (src) != PLUS || GET_MODE (src) != Pmode) @@ -1205,58 +1641,49 @@ s390_expand_plus_operand (target, src, scratch_in) float registers occur in an address. */ sum1 = find_replacement (&XEXP (src, 0)); sum2 = find_replacement (&XEXP (src, 1)); + src = gen_rtx_PLUS (Pmode, sum1, sum2); - /* If one of the two operands is equal to the target, - make it the first one. If one is a constant, make - it the second one. */ - if (rtx_equal_p (target, sum2) - || GET_CODE (sum1) == CONST_INT) + /* If the address is already strictly valid, there's nothing to do. */ + if (!s390_decompose_address (src, &ad) + || (ad.base && !REG_OK_FOR_BASE_STRICT_P (ad.base)) + || (ad.indx && !REG_OK_FOR_INDEX_STRICT_P (ad.indx))) { - rtx tem = sum2; - sum2 = sum1; - sum1 = tem; - } + /* Otherwise, one of the operands cannot be an address register; + we reload its value into the scratch register. */ + if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15) + { + emit_move_insn (scratch, sum1); + sum1 = scratch; + } + if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15) + { + emit_move_insn (scratch, sum2); + sum2 = scratch; + } - /* If the first operand is not an address register, - we reload it into the target. */ - if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15) - { - emit_move_insn (target, sum1); - sum1 = target; - } + /* According to the way these invalid addresses are generated + in reload.c, it should never happen (at least on s390) that + *neither* of the PLUS components, after find_replacements + was applied, is an address register. */ + if (sum1 == scratch && sum2 == scratch) + { + debug_rtx (src); + abort (); + } - /* Likewise for the second operand. However, take - care not to clobber the target if we already used - it for the first operand. Use the scratch instead. - Also, allow an immediate offset if it is in range. */ - if ((true_regnum (sum2) < 1 || true_regnum (sum2) > 15) - && !(GET_CODE (sum2) == CONST_INT - && INTVAL (sum2) >= 0 && INTVAL (sum2) < 4096)) - { - if (!rtx_equal_p (target, sum1)) - { - emit_move_insn (target, sum2); - sum2 = target; - } - else - { - emit_move_insn (scratch, sum2); - sum2 = scratch; - } + src = gen_rtx_PLUS (Pmode, sum1, sum2); } /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS is only ever performed on addresses, so we can mark the sum as legitimate for LA in any case. */ - src = gen_rtx_PLUS (Pmode, sum1, sum2); - src = legitimize_la_operand (src); - emit_insn (gen_rtx_SET (VOIDmode, target, src)); + s390_load_address (target, src); } /* Decompose a RTL expression ADDR for a memory address into - its components, returned in OUT. The boolean STRICT - specifies whether strict register checking applies. + its components, returned in OUT. + Returns 0 if ADDR is not a valid memory address, nonzero otherwise. If OUT is NULL, don't return the components, but check for validity only. @@ -1266,10 +1693,9 @@ s390_expand_plus_operand (target, src, scratch_in) canonical form so that they will be recognized. */ static int -s390_decompose_address (addr, out, strict) +s390_decompose_address (addr, out) register rtx addr; struct s390_address *out; - int strict; { rtx base = NULL_RTX; rtx indx = NULL_RTX; @@ -1320,6 +1746,22 @@ s390_decompose_address (addr, out, strict) disp = addr; /* displacement */ + /* Prefer to use pointer as base, not index. */ + if (base && indx) + { + int base_ptr = GET_CODE (base) == UNSPEC + || (REG_P (base) && REG_POINTER (base)); + int indx_ptr = GET_CODE (indx) == UNSPEC + || (REG_P (indx) && REG_POINTER (indx)); + + if (!base_ptr && indx_ptr) + { + rtx tmp = base; + base = indx; + indx = tmp; + } + } + /* Validate base register. */ if (base) { @@ -1334,16 +1776,15 @@ s390_decompose_address (addr, out, strict) if (GET_CODE (base) != REG || GET_MODE (base) != Pmode) return FALSE; - if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base)) - || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base))) - return FALSE; - if (REGNO (base) == BASE_REGISTER || REGNO (base) == STACK_POINTER_REGNUM || REGNO (base) == FRAME_POINTER_REGNUM || ((reload_completed || reload_in_progress) && frame_pointer_needed && REGNO (base) == HARD_FRAME_POINTER_REGNUM) + || REGNO (base) == ARG_POINTER_REGNUM + || (REGNO (base) >= FIRST_VIRTUAL_REGISTER + && REGNO (base) <= LAST_VIRTUAL_REGISTER) || (flag_pic && REGNO (base) == PIC_OFFSET_TABLE_REGNUM)) pointer = TRUE; @@ -1363,16 +1804,15 @@ s390_decompose_address (addr, out, strict) if (GET_CODE (indx) != REG || GET_MODE (indx) != Pmode) return FALSE; - if ((strict && ! REG_OK_FOR_BASE_STRICT_P (indx)) - || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (indx))) - return FALSE; - if (REGNO (indx) == BASE_REGISTER || REGNO (indx) == STACK_POINTER_REGNUM || REGNO (indx) == FRAME_POINTER_REGNUM || ((reload_completed || reload_in_progress) && frame_pointer_needed && REGNO (indx) == HARD_FRAME_POINTER_REGNUM) + || REGNO (indx) == ARG_POINTER_REGNUM + || (REGNO (indx) >= FIRST_VIRTUAL_REGISTER + && REGNO (indx) <= LAST_VIRTUAL_REGISTER) || (flag_pic && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM)) pointer = TRUE; @@ -1384,15 +1824,27 @@ s390_decompose_address (addr, out, strict) /* Allow integer constant in range. */ if (GET_CODE (disp) == CONST_INT) { - if (INTVAL (disp) < 0 || INTVAL (disp) >= 4096) - return FALSE; + /* If the argument pointer is involved, the displacement will change + later anyway as the argument pointer gets eliminated. This could + make a valid displacement invalid, but it is more likely to make + an invalid displacement valid, because we sometimes access the + register save area via negative offsets to the arg pointer. + Thus we don't check the displacement for validity here. If after + elimination the displacement turns out to be invalid after all, + this is fixed up by reload in any case. */ + if (base != arg_pointer_rtx && indx != arg_pointer_rtx) + { + if (INTVAL (disp) < 0 || INTVAL (disp) >= 4096) + return FALSE; + } } /* In the small-PIC case, the linker converts @GOT12 - offsets to possible displacements. */ + and @GOTNTPOFF offsets to possible displacements. */ else if (GET_CODE (disp) == CONST && GET_CODE (XEXP (disp, 0)) == UNSPEC - && XINT (XEXP (disp, 0), 1) == 110) + && (XINT (XEXP (disp, 0), 1) == 110 + || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF)) { if (flag_pic != 1) return FALSE; @@ -1442,12 +1894,6 @@ s390_decompose_address (addr, out, strict) || !CONSTANT_POOL_ADDRESS_P (disp)) return FALSE; - /* In 64-bit PIC mode we cannot accept symbolic - constants in the constant pool. */ - if (TARGET_64BIT && flag_pic - && SYMBOLIC_CONST (get_pool_constant (disp))) - return FALSE; - /* If we have an offset, make sure it does not exceed the size of the constant pool entry. */ if (offset && offset >= GET_MODE_SIZE (get_pool_mode (disp))) @@ -1497,7 +1943,26 @@ legitimate_address_p (mode, addr, strict) register rtx addr; int strict; { - return s390_decompose_address (addr, NULL, strict); + struct s390_address ad; + if (!s390_decompose_address (addr, &ad)) + return FALSE; + + if (strict) + { + if (ad.base && !REG_OK_FOR_BASE_STRICT_P (ad.base)) + return FALSE; + if (ad.indx && !REG_OK_FOR_INDEX_STRICT_P (ad.indx)) + return FALSE; + } + else + { + if (ad.base && !REG_OK_FOR_BASE_NONSTRICT_P (ad.base)) + return FALSE; + if (ad.indx && !REG_OK_FOR_INDEX_NONSTRICT_P (ad.indx)) + return FALSE; + } + + return TRUE; } /* Return 1 if OP is a valid operand for the LA instruction. @@ -1509,7 +1974,7 @@ legitimate_la_operand_p (op) register rtx op; { struct s390_address addr; - if (!s390_decompose_address (op, &addr, FALSE)) + if (!s390_decompose_address (op, &addr)) return FALSE; if (TARGET_64BIT || addr.pointer) @@ -1518,30 +1983,43 @@ legitimate_la_operand_p (op) return FALSE; } -/* Return a modified variant of OP that is guaranteed to - be accepted by legitimate_la_operand_p. */ - -rtx -legitimize_la_operand (op) +/* Return 1 if OP is a valid operand for the LA instruction, + and we prefer to use LA over addition to compute it. */ + +int +preferred_la_operand_p (op) register rtx op; { struct s390_address addr; - if (!s390_decompose_address (op, &addr, FALSE)) - abort (); + if (!s390_decompose_address (op, &addr)) + return FALSE; - if (TARGET_64BIT || addr.pointer) - return op; + if (!TARGET_64BIT && !addr.pointer) + return FALSE; - if (!addr.base) - abort (); + if (addr.pointer) + return TRUE; - op = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr.base), 101); - if (addr.indx) - op = gen_rtx_PLUS (Pmode, op, addr.indx); - if (addr.disp) - op = gen_rtx_PLUS (Pmode, op, addr.disp); + if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base)) + || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx))) + return TRUE; - return op; + return FALSE; +} + +/* Emit a forced load-address operation to load SRC into DST. + This will use the LOAD ADDRESS instruction even in situations + where legitimate_la_operand_p (SRC) returns false. */ + +void +s390_load_address (dst, src) + rtx dst; + rtx src; +{ + if (TARGET_64BIT) + emit_move_insn (dst, src); + else + emit_insn (gen_force_la_31 (dst, src)); } /* Return a legitimate reference for ORIG (an address) using the @@ -1577,7 +2055,7 @@ legitimize_pic_address (orig, reg) || CONSTANT_POOL_ADDRESS_P (addr)))) { /* This is a local symbol. */ - if (TARGET_64BIT) + if (TARGET_64BIT && larl_operand (addr, VOIDmode)) { /* Access local symbols PC-relative via LARL. This is the same as in the non-PIC case, so it is @@ -1589,9 +2067,9 @@ legitimize_pic_address (orig, reg) rtx temp = reg? reg : gen_reg_rtx (Pmode); - addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), 100); - addr = gen_rtx_CONST (SImode, addr); - addr = force_const_mem (SImode, addr); + addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 100); + addr = gen_rtx_CONST (Pmode, addr); + addr = force_const_mem (Pmode, addr); emit_move_insn (temp, addr); base = gen_rtx_REG (Pmode, BASE_REGISTER); @@ -1615,7 +2093,8 @@ legitimize_pic_address (orig, reg) /* Assume GOT offset < 4k. This is handled the same way in both 31- and 64-bit code (@GOT12). */ - current_function_uses_pic_offset_table = 1; + if (reload_in_progress || reload_completed) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 110); new = gen_rtx_CONST (Pmode, new); @@ -1648,11 +2127,12 @@ legitimize_pic_address (orig, reg) rtx temp = gen_reg_rtx (Pmode); - current_function_uses_pic_offset_table = 1; + if (reload_in_progress || reload_completed) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; - addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), 112); - addr = gen_rtx_CONST (SImode, addr); - addr = force_const_mem (SImode, addr); + addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 112); + addr = gen_rtx_CONST (Pmode, addr); + addr = force_const_mem (Pmode, addr); emit_move_insn (temp, addr); new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); @@ -1678,7 +2158,7 @@ legitimize_pic_address (orig, reg) case 100: case 112: case 114: - new = force_const_mem (SImode, orig); + new = force_const_mem (Pmode, orig); break; /* @GOTENT is OK as is. */ @@ -1693,9 +2173,9 @@ legitimize_pic_address (orig, reg) rtx temp = reg? reg : gen_reg_rtx (Pmode); addr = XVECEXP (addr, 0, 0); - addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), 114); - addr = gen_rtx_CONST (SImode, addr); - addr = force_const_mem (SImode, addr); + addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 114); + addr = gen_rtx_CONST (Pmode, addr); + addr = force_const_mem (Pmode, addr); emit_move_insn (temp, addr); base = gen_rtx_REG (Pmode, BASE_REGISTER); @@ -1729,7 +2209,7 @@ legitimize_pic_address (orig, reg) || CONSTANT_POOL_ADDRESS_P (op0)))) && GET_CODE (op1) == CONST_INT) { - if (TARGET_64BIT) + if (TARGET_64BIT && larl_operand (op0, VOIDmode)) { if (INTVAL (op1) & 1) { @@ -1766,10 +2246,10 @@ legitimize_pic_address (orig, reg) rtx temp = reg? reg : gen_reg_rtx (Pmode); - addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), 100); - addr = gen_rtx_PLUS (SImode, addr, op1); - addr = gen_rtx_CONST (SImode, addr); - addr = force_const_mem (SImode, addr); + addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 100); + addr = gen_rtx_PLUS (Pmode, addr, op1); + addr = gen_rtx_CONST (Pmode, addr); + addr = force_const_mem (Pmode, addr); emit_move_insn (temp, addr); base = gen_rtx_REG (Pmode, BASE_REGISTER); @@ -1795,7 +2275,7 @@ legitimize_pic_address (orig, reg) if (XINT (op0, 1) != 100) abort (); - new = force_const_mem (SImode, orig); + new = force_const_mem (Pmode, orig); } /* Otherwise, compute the sum. */ @@ -1825,18 +2305,237 @@ legitimize_pic_address (orig, reg) return new; } +/* Load the thread pointer into a register. */ + +static rtx +get_thread_pointer () +{ + rtx tp; + + tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); + tp = force_reg (Pmode, tp); + mark_reg_pointer (tp, BITS_PER_WORD); + + return tp; +} + +/* Construct the SYMBOL_REF for the tls_get_offset function. */ + +static GTY(()) rtx s390_tls_symbol; +rtx +s390_tls_get_offset () +{ + if (!s390_tls_symbol) + s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset"); + + return s390_tls_symbol; +} + +/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute + this (thread-local) address. REG may be used as temporary. */ + +static rtx +legitimize_tls_address (addr, reg) + rtx addr; + rtx reg; +{ + rtx new, tls_call, temp, base, r2, insn; + + if (GET_CODE (addr) == SYMBOL_REF) + switch (tls_symbolic_operand (addr)) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + start_sequence (); + r2 = gen_rtx_REG (Pmode, 2); + tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD); + new = gen_rtx_CONST (Pmode, tls_call); + new = force_const_mem (Pmode, new); + emit_move_insn (r2, new); + emit_call_insn (gen_call_value_tls (r2, tls_call)); + insn = get_insns (); + end_sequence (); + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); + temp = gen_reg_rtx (Pmode); + emit_libcall_block (insn, temp, r2, new); + + new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp); + if (reg != 0) + { + s390_load_address (reg, new); + new = reg; + } + break; + + case TLS_MODEL_LOCAL_DYNAMIC: + start_sequence (); + r2 = gen_rtx_REG (Pmode, 2); + tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM); + new = gen_rtx_CONST (Pmode, tls_call); + new = force_const_mem (Pmode, new); + emit_move_insn (r2, new); + emit_call_insn (gen_call_value_tls (r2, tls_call)); + insn = get_insns (); + end_sequence (); + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF); + temp = gen_reg_rtx (Pmode); + emit_libcall_block (insn, temp, r2, new); + + new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp); + base = gen_reg_rtx (Pmode); + s390_load_address (base, new); + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF); + new = gen_rtx_CONST (Pmode, new); + new = force_const_mem (Pmode, new); + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + + new = gen_rtx_PLUS (Pmode, base, temp); + if (reg != 0) + { + s390_load_address (reg, new); + new = reg; + } + break; + + case TLS_MODEL_INITIAL_EXEC: + if (flag_pic == 1) + { + /* Assume GOT offset < 4k. This is handled the same way + in both 31- and 64-bit code. */ + + if (reload_in_progress || reload_completed) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF); + new = gen_rtx_CONST (Pmode, new); + new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new); + new = gen_rtx_MEM (Pmode, new); + RTX_UNCHANGING_P (new) = 1; + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + } + else if (TARGET_64BIT) + { + /* If the GOT offset might be >= 4k, we determine the position + of the GOT entry via a PC-relative LARL. */ + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF); + new = gen_rtx_CONST (Pmode, new); + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + + new = gen_rtx_MEM (Pmode, temp); + RTX_UNCHANGING_P (new) = 1; + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + } + else if (flag_pic) + { + /* If the GOT offset might be >= 4k, we have to load it + from the literal pool. */ + + if (reload_in_progress || reload_completed) + regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF); + new = gen_rtx_CONST (Pmode, new); + new = force_const_mem (Pmode, new); + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + + new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); + new = gen_rtx_MEM (Pmode, new); + RTX_UNCHANGING_P (new) = 1; + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new, addr), UNSPEC_TLS_LOAD); + temp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (Pmode, temp, new)); + } + else + { + /* In position-dependent code, load the absolute address of + the GOT entry from the literal pool. */ + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF); + new = gen_rtx_CONST (Pmode, new); + new = force_const_mem (Pmode, new); + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + + new = temp; + new = gen_rtx_MEM (Pmode, new); + RTX_UNCHANGING_P (new) = 1; + + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new, addr), UNSPEC_TLS_LOAD); + temp = gen_reg_rtx (Pmode); + emit_insn (gen_rtx_SET (Pmode, temp, new)); + } + + new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp); + if (reg != 0) + { + s390_load_address (reg, new); + new = reg; + } + break; + + case TLS_MODEL_LOCAL_EXEC: + new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); + new = gen_rtx_CONST (Pmode, new); + new = force_const_mem (Pmode, new); + temp = gen_reg_rtx (Pmode); + emit_move_insn (temp, new); + + new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp); + if (reg != 0) + { + s390_load_address (reg, new); + new = reg; + } + break; + + default: + abort (); + } + + else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC) + { + switch (XINT (XEXP (addr, 0), 1)) + { + case UNSPEC_INDNTPOFF: + if (TARGET_64BIT) + new = addr; + else + abort (); + break; + + default: + abort (); + } + } + + else + abort (); /* for now ... */ + + return new; +} + /* Emit insns to move operands[1] into operands[0]. */ void -emit_pic_move (operands, mode) +emit_symbolic_move (operands) rtx *operands; - enum machine_mode mode ATTRIBUTE_UNUSED; { rtx temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); - if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1])) + if (GET_CODE (operands[0]) == MEM) operands[1] = force_reg (Pmode, operands[1]); - else + else if (TLS_SYMBOLIC_CONST (operands[1])) + operands[1] = legitimize_tls_address (operands[1], temp); + else if (flag_pic) operands[1] = legitimize_pic_address (operands[1], temp); } @@ -1859,7 +2558,14 @@ legitimize_address (x, oldx, mode) { rtx constant_term = const0_rtx; - if (flag_pic) + if (TLS_SYMBOLIC_CONST (x)) + { + x = legitimize_tls_address (x, 0); + + if (legitimate_address_p (mode, x, FALSE)) + return x; + } + else if (flag_pic) { if (SYMBOLIC_CONST (x) || (GET_CODE (x) == PLUS @@ -1873,6 +2579,31 @@ legitimize_address (x, oldx, mode) x = eliminate_constant_term (x, &constant_term); + /* Optimize loading of large displacements by splitting them + into the multiple of 4K and the rest; this allows the + former to be CSE'd if possible. + + Don't do this if the displacement is added to a register + pointing into the stack frame, as the offsets will + change later anyway. */ + + if (GET_CODE (constant_term) == CONST_INT + && (INTVAL (constant_term) < 0 + || INTVAL (constant_term) >= 4096) + && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x)))) + { + HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff; + HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower; + + rtx temp = gen_reg_rtx (Pmode); + rtx val = force_operand (GEN_INT (upper), temp); + if (val != temp) + emit_move_insn (temp, val); + + x = gen_rtx_PLUS (Pmode, x, temp); + constant_term = GEN_INT (lower); + } + if (GET_CODE (x) == PLUS) { if (GET_CODE (XEXP (x, 0)) == REG) @@ -1902,6 +2633,316 @@ legitimize_address (x, oldx, mode) return x; } +/* Emit code to move LEN bytes from DST to SRC. */ + +void +s390_expand_movstr (dst, src, len) + rtx dst; + rtx src; + rtx len; +{ + rtx (*gen_short) PARAMS ((rtx, rtx, rtx)) = + TARGET_64BIT ? gen_movstr_short_64 : gen_movstr_short_31; + rtx (*gen_long) PARAMS ((rtx, rtx, rtx, rtx)) = + TARGET_64BIT ? gen_movstr_long_64 : gen_movstr_long_31; + + + if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) + { + if (INTVAL (len) > 0) + emit_insn ((*gen_short) (dst, src, GEN_INT (INTVAL (len) - 1))); + } + + else if (TARGET_MVCLE) + { + enum machine_mode double_mode = TARGET_64BIT ? TImode : DImode; + enum machine_mode single_mode = TARGET_64BIT ? DImode : SImode; + rtx reg0 = gen_reg_rtx (double_mode); + rtx reg1 = gen_reg_rtx (double_mode); + + emit_move_insn (gen_highpart (single_mode, reg0), + force_operand (XEXP (dst, 0), NULL_RTX)); + emit_move_insn (gen_highpart (single_mode, reg1), + force_operand (XEXP (src, 0), NULL_RTX)); + + convert_move (gen_lowpart (single_mode, reg0), len, 1); + convert_move (gen_lowpart (single_mode, reg1), len, 1); + + emit_insn ((*gen_long) (reg0, reg1, reg0, reg1)); + } + + else + { + rtx dst_addr, src_addr, count, blocks, temp; + rtx end_label = gen_label_rtx (); + enum machine_mode mode; + tree type; + + mode = GET_MODE (len); + if (mode == VOIDmode) + mode = word_mode; + + type = (*lang_hooks.types.type_for_mode) (mode, 1); + if (!type) + abort (); + + dst_addr = gen_reg_rtx (Pmode); + src_addr = gen_reg_rtx (Pmode); + count = gen_reg_rtx (mode); + blocks = gen_reg_rtx (mode); + + convert_move (count, len, 1); + emit_cmp_and_jump_insns (count, const0_rtx, + EQ, NULL_RTX, mode, 1, end_label); + + emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); + emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX)); + dst = change_address (dst, VOIDmode, dst_addr); + src = change_address (src, VOIDmode, src_addr); + + temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 0); + if (temp != count) + emit_move_insn (count, temp); + + temp = expand_binop (mode, ashr_optab, count, GEN_INT (8), blocks, 1, 0); + if (temp != blocks) + emit_move_insn (blocks, temp); + + expand_start_loop (1); + expand_exit_loop_top_cond (0, build (NE_EXPR, type, + make_tree (type, blocks), + make_tree (type, const0_rtx))); + + emit_insn ((*gen_short) (dst, src, GEN_INT (255))); + s390_load_address (dst_addr, + gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); + s390_load_address (src_addr, + gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256))); + + temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 0); + if (temp != blocks) + emit_move_insn (blocks, temp); + + expand_end_loop (); + + emit_insn ((*gen_short) (dst, src, convert_to_mode (word_mode, count, 1))); + emit_label (end_label); + } +} + +/* Emit code to clear LEN bytes at DST. */ + +void +s390_expand_clrstr (dst, len) + rtx dst; + rtx len; +{ + rtx (*gen_short) PARAMS ((rtx, rtx)) = + TARGET_64BIT ? gen_clrstr_short_64 : gen_clrstr_short_31; + rtx (*gen_long) PARAMS ((rtx, rtx, rtx)) = + TARGET_64BIT ? gen_clrstr_long_64 : gen_clrstr_long_31; + + + if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) + { + if (INTVAL (len) > 0) + emit_insn ((*gen_short) (dst, GEN_INT (INTVAL (len) - 1))); + } + + else if (TARGET_MVCLE) + { + enum machine_mode double_mode = TARGET_64BIT ? TImode : DImode; + enum machine_mode single_mode = TARGET_64BIT ? DImode : SImode; + rtx reg0 = gen_reg_rtx (double_mode); + rtx reg1 = gen_reg_rtx (double_mode); + + emit_move_insn (gen_highpart (single_mode, reg0), + force_operand (XEXP (dst, 0), NULL_RTX)); + convert_move (gen_lowpart (single_mode, reg0), len, 1); + + emit_move_insn (gen_highpart (single_mode, reg1), const0_rtx); + emit_move_insn (gen_lowpart (single_mode, reg1), const0_rtx); + + emit_insn ((*gen_long) (reg0, reg1, reg0)); + } + + else + { + rtx dst_addr, src_addr, count, blocks, temp; + rtx end_label = gen_label_rtx (); + enum machine_mode mode; + tree type; + + mode = GET_MODE (len); + if (mode == VOIDmode) + mode = word_mode; + + type = (*lang_hooks.types.type_for_mode) (mode, 1); + if (!type) + abort (); + + dst_addr = gen_reg_rtx (Pmode); + src_addr = gen_reg_rtx (Pmode); + count = gen_reg_rtx (mode); + blocks = gen_reg_rtx (mode); + + convert_move (count, len, 1); + emit_cmp_and_jump_insns (count, const0_rtx, + EQ, NULL_RTX, mode, 1, end_label); + + emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX)); + dst = change_address (dst, VOIDmode, dst_addr); + + temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 0); + if (temp != count) + emit_move_insn (count, temp); + + temp = expand_binop (mode, ashr_optab, count, GEN_INT (8), blocks, 1, 0); + if (temp != blocks) + emit_move_insn (blocks, temp); + + expand_start_loop (1); + expand_exit_loop_top_cond (0, build (NE_EXPR, type, + make_tree (type, blocks), + make_tree (type, const0_rtx))); + + emit_insn ((*gen_short) (dst, GEN_INT (255))); + s390_load_address (dst_addr, + gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256))); + + temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 0); + if (temp != blocks) + emit_move_insn (blocks, temp); + + expand_end_loop (); + + emit_insn ((*gen_short) (dst, convert_to_mode (word_mode, count, 1))); + emit_label (end_label); + } +} + +/* Emit code to compare LEN bytes at OP0 with those at OP1, + and return the result in TARGET. */ + +void +s390_expand_cmpstr (target, op0, op1, len) + rtx target; + rtx op0; + rtx op1; + rtx len; +{ + rtx (*gen_short) PARAMS ((rtx, rtx, rtx)) = + TARGET_64BIT ? gen_cmpstr_short_64 : gen_cmpstr_short_31; + rtx (*gen_long) PARAMS ((rtx, rtx, rtx, rtx)) = + TARGET_64BIT ? gen_cmpstr_long_64 : gen_cmpstr_long_31; + rtx (*gen_result) PARAMS ((rtx)) = + GET_MODE (target) == DImode ? gen_cmpint_di : gen_cmpint_si; + + op0 = protect_from_queue (op0, 0); + op1 = protect_from_queue (op1, 0); + len = protect_from_queue (len, 0); + + if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) + { + if (INTVAL (len) > 0) + { + emit_insn ((*gen_short) (op0, op1, GEN_INT (INTVAL (len) - 1))); + emit_insn ((*gen_result) (target)); + } + else + emit_move_insn (target, const0_rtx); + } + + else /* if (TARGET_MVCLE) */ + { + enum machine_mode double_mode = TARGET_64BIT ? TImode : DImode; + enum machine_mode single_mode = TARGET_64BIT ? DImode : SImode; + rtx reg0 = gen_reg_rtx (double_mode); + rtx reg1 = gen_reg_rtx (double_mode); + + emit_move_insn (gen_highpart (single_mode, reg0), + force_operand (XEXP (op0, 0), NULL_RTX)); + emit_move_insn (gen_highpart (single_mode, reg1), + force_operand (XEXP (op1, 0), NULL_RTX)); + + convert_move (gen_lowpart (single_mode, reg0), len, 1); + convert_move (gen_lowpart (single_mode, reg1), len, 1); + + emit_insn ((*gen_long) (reg0, reg1, reg0, reg1)); + emit_insn ((*gen_result) (target)); + } + +#if 0 + /* Deactivate for now as profile code cannot cope with + CC being live across basic block boundaries. */ + else + { + rtx addr0, addr1, count, blocks, temp; + rtx end_label = gen_label_rtx (); + enum machine_mode mode; + tree type; + + mode = GET_MODE (len); + if (mode == VOIDmode) + mode = word_mode; + + type = (*lang_hooks.types.type_for_mode) (mode, 1); + if (!type) + abort (); + + addr0 = gen_reg_rtx (Pmode); + addr1 = gen_reg_rtx (Pmode); + count = gen_reg_rtx (mode); + blocks = gen_reg_rtx (mode); + + convert_move (count, len, 1); + emit_cmp_and_jump_insns (count, const0_rtx, + EQ, NULL_RTX, mode, 1, end_label); + + emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX)); + emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX)); + op0 = change_address (op0, VOIDmode, addr0); + op1 = change_address (op1, VOIDmode, addr1); + + temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, 0); + if (temp != count) + emit_move_insn (count, temp); + + temp = expand_binop (mode, ashr_optab, count, GEN_INT (8), blocks, 1, 0); + if (temp != blocks) + emit_move_insn (blocks, temp); + + expand_start_loop (1); + expand_exit_loop_top_cond (0, build (NE_EXPR, type, + make_tree (type, blocks), + make_tree (type, const0_rtx))); + + emit_insn ((*gen_short) (op0, op1, GEN_INT (255))); + temp = gen_rtx_NE (VOIDmode, gen_rtx_REG (CCSmode, 33), const0_rtx); + temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, + gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx); + temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); + emit_jump_insn (temp); + + s390_load_address (addr0, + gen_rtx_PLUS (Pmode, addr0, GEN_INT (256))); + s390_load_address (addr1, + gen_rtx_PLUS (Pmode, addr1, GEN_INT (256))); + + temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, 0); + if (temp != blocks) + emit_move_insn (blocks, temp); + + expand_end_loop (); + + emit_insn ((*gen_short) (op0, op1, convert_to_mode (word_mode, count, 1))); + emit_label (end_label); + + emit_insn ((*gen_result) (target)); + } +#endif +} + /* In the name of slightly smaller debug output, and to cater to general assembler losage, recognize various UNSPEC sequences and turn them back into a direct symbol reference. */ @@ -1940,6 +2981,48 @@ s390_simplify_dwarf_addr (orig_x) return orig_x; } +/* Locate some local-dynamic symbol still in use by this function + so that we can print its name in local-dynamic base patterns. */ + +static const char * +get_some_local_dynamic_name () +{ + rtx insn; + + if (cfun->machine->some_ld_name) + return cfun->machine->some_ld_name; + + for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) + if (INSN_P (insn) + && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) + return cfun->machine->some_ld_name; + + abort (); +} + +static int +get_some_local_dynamic_name_1 (px, data) + rtx *px; + void *data ATTRIBUTE_UNUSED; +{ + rtx x = *px; + + if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) + { + x = get_pool_constant (x); + return for_each_rtx (&x, get_some_local_dynamic_name_1, 0); + } + + if (GET_CODE (x) == SYMBOL_REF + && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC) + { + cfun->machine->some_ld_name = XSTR (x, 0); + return 1; + } + + return 0; +} + /* Output symbolic constant X in assembler syntax to stdio stream FILE. */ @@ -1981,8 +3064,15 @@ s390_output_symbolic_const (file, x) switch (XINT (x, 1)) { case 100: + case 104: + s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "-"); + s390_output_symbolic_const (file, cfun->machine->literal_pool_label); + break; + case 105: + s390_output_symbolic_const (file, cfun->machine->literal_pool_label); + fprintf (file, "-"); s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); - fprintf (file, "-.LT%X", s390_function_count); break; case 110: s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); @@ -2002,7 +3092,32 @@ s390_output_symbolic_const (file, x) break; case 114: s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); - fprintf (file, "@PLT-.LT%X", s390_function_count); + fprintf (file, "@PLT-"); + s390_output_symbolic_const (file, cfun->machine->literal_pool_label); + break; + case UNSPEC_TLSGD: + s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@TLSGD"); + break; + case UNSPEC_TLSLDM: + assemble_name (file, get_some_local_dynamic_name ()); + fprintf (file, "@TLSLDM"); + break; + case UNSPEC_DTPOFF: + s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@DTPOFF"); + break; + case UNSPEC_NTPOFF: + s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@NTPOFF"); + break; + case UNSPEC_GOTNTPOFF: + s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@GOTNTPOFF"); + break; + case UNSPEC_INDNTPOFF: + s390_output_symbolic_const (file, XVECEXP (x, 0, 0)); + fprintf (file, "@INDNTPOFF"); break; default: output_operand_lossage ("invalid UNSPEC as operand (2)"); @@ -2026,7 +3141,9 @@ print_operand_address (file, addr) { struct s390_address ad; - if (!s390_decompose_address (addr, &ad, TRUE)) + if (!s390_decompose_address (addr, &ad) + || (ad.base && !REG_OK_FOR_BASE_STRICT_P (ad.base)) + || (ad.indx && !REG_OK_FOR_INDEX_STRICT_P (ad.indx))) output_operand_lossage ("Cannot decompose address."); if (ad.disp) @@ -2047,6 +3164,7 @@ print_operand_address (file, addr) 'C': print opcode suffix for branch condition. 'D': print opcode suffix for inverse branch condition. + 'J': print tls_load/tls_gdcall/tls_ldcall suffix 'O': print only the displacement of a memory reference. 'R': print only the base register of a memory reference. 'N': print the second word of a DImode operand. @@ -2072,12 +3190,33 @@ print_operand (file, x, code) fprintf (file, s390_branch_condition_mnemonic (x, TRUE)); return; + case 'J': + if (GET_CODE (x) == SYMBOL_REF) + { + fprintf (file, "%s", ":tls_load:"); + output_addr_const (file, x); + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD) + { + fprintf (file, "%s", ":tls_gdcall:"); + output_addr_const (file, XVECEXP (x, 0, 0)); + } + else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM) + { + fprintf (file, "%s", ":tls_ldcall:"); + assemble_name (file, get_some_local_dynamic_name ()); + } + else + abort (); + return; + case 'O': { struct s390_address ad; if (GET_CODE (x) != MEM - || !s390_decompose_address (XEXP (x, 0), &ad, TRUE) + || !s390_decompose_address (XEXP (x, 0), &ad) + || (ad.base && !REG_OK_FOR_BASE_STRICT_P (ad.base)) || ad.indx) abort (); @@ -2093,7 +3232,8 @@ print_operand (file, x, code) struct s390_address ad; if (GET_CODE (x) != MEM - || !s390_decompose_address (XEXP (x, 0), &ad, TRUE) + || !s390_decompose_address (XEXP (x, 0), &ad) + || (ad.base && !REG_OK_FOR_BASE_STRICT_P (ad.base)) || ad.indx) abort (); @@ -2248,7 +3388,11 @@ addr_generation_dependency_p (dep_rtx, insn) if (GET_CODE (dep_rtx) == SET) { target = SET_DEST (dep_rtx); - + if (GET_CODE (target) == STRICT_LOW_PART) + target = XEXP (target, 0); + while (GET_CODE (target) == SUBREG) + target = SUBREG_REG (target); + if (GET_CODE (target) == REG) { int regno = REGNO (target); @@ -2389,18 +3533,25 @@ s390_adjust_priority (insn, priority) } -/* Split all branches that exceed the maximum distance. */ +/* Split all branches that exceed the maximum distance. + Returns true if this created a new literal pool entry. + + Code generated by this routine is allowed to use + TEMP_REG as temporary scratch register. If this is + done, TEMP_USED is set to true. */ -static void -s390_split_branches (void) +static int +s390_split_branches (temp_reg, temp_used) + rtx temp_reg; + bool *temp_used; { - rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); - rtx insn, pat, label, target, jump, tmp; + int new_literal = 0; + rtx insn, pat, tmp, target; + rtx *label; - /* In 64-bit mode we can jump +- 4GB. */ + /* We need correct insn addresses. */ - if (TARGET_64BIT) - return; + shorten_branches (get_insns ()); /* Find all branches that exceed 64KB, and split them. */ @@ -2410,61 +3561,66 @@ s390_split_branches (void) continue; pat = PATTERN (insn); - if (GET_CODE (pat) != SET) + if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx) continue; if (GET_CODE (SET_SRC (pat)) == LABEL_REF) { - label = SET_SRC (pat); + label = &SET_SRC (pat); } else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE) { if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF) - label = XEXP (SET_SRC (pat), 1); + label = &XEXP (SET_SRC (pat), 1); else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF) - label = XEXP (SET_SRC (pat), 2); + label = &XEXP (SET_SRC (pat), 2); else continue; } else continue; - if (get_attr_length (insn) == 4) + if (get_attr_length (insn) <= (TARGET_64BIT ? 6 : 4)) continue; - if (flag_pic) + *temp_used = 1; + + if (TARGET_64BIT) { - target = gen_rtx_UNSPEC (SImode, gen_rtvec (1, label), 100); - target = gen_rtx_CONST (SImode, target); - target = force_const_mem (SImode, target); - jump = gen_rtx_REG (Pmode, BASE_REGISTER); - jump = gen_rtx_PLUS (Pmode, jump, temp_reg); + tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, *label), insn); + INSN_ADDRESSES_NEW (tmp, -1); + + target = temp_reg; } - else + else if (!flag_pic) { - target = force_const_mem (Pmode, label); - jump = temp_reg; - } + new_literal = 1; + tmp = force_const_mem (Pmode, *label); + tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn); + INSN_ADDRESSES_NEW (tmp, -1); - if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE) + target = temp_reg; + } + else { - if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF) - jump = gen_rtx_IF_THEN_ELSE (VOIDmode, XEXP (SET_SRC (pat), 0), - jump, pc_rtx); - else - jump = gen_rtx_IF_THEN_ELSE (VOIDmode, XEXP (SET_SRC (pat), 0), - pc_rtx, jump); + new_literal = 1; + tmp = gen_rtx_UNSPEC (SImode, gen_rtvec (1, *label), 104); + tmp = gen_rtx_CONST (SImode, tmp); + tmp = force_const_mem (SImode, tmp); + tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, tmp), insn); + INSN_ADDRESSES_NEW (tmp, -1); + + target = gen_rtx_REG (Pmode, BASE_REGISTER); + target = gen_rtx_PLUS (Pmode, target, temp_reg); } - tmp = emit_insn_before (gen_rtx_SET (Pmode, temp_reg, target), insn); - INSN_ADDRESSES_NEW (tmp, -1); - - tmp = emit_jump_insn_before (gen_rtx_SET (VOIDmode, pc_rtx, jump), insn); - INSN_ADDRESSES_NEW (tmp, -1); - - remove_insn (insn); - insn = tmp; + if (!validate_change (insn, label, target, 0)) + abort (); } + + return new_literal; } @@ -2582,6 +3738,153 @@ replace_constant_pool_ref (x, ref, addr) } } +/* Check whether ADDR is an address that uses the base register, + without actually constituting a literal pool access. (This happens + in 31-bit PIC mode, where the base register is used as anchor for + relative addressing of local symbols.) + + Returns 1 if the base register occupies the base slot, + returns 2 if the base register occupies the index slot, + returns 0 if the address is not of this form. */ + +static int +find_base_register_in_addr (addr) + struct s390_address *addr; +{ + /* If DISP is complex, we might have a literal pool reference. */ + if (addr->disp && GET_CODE (addr->disp) != CONST_INT) + return 0; + + if (addr->base && REG_P (addr->base) && REGNO (addr->base) == BASE_REGISTER) + return 1; + + if (addr->indx && REG_P (addr->indx) && REGNO (addr->indx) == BASE_REGISTER) + return 2; + + return 0; +} + +/* Return true if X contains an address that uses the base register, + without actually constituting a literal pool access. */ + +static bool +find_base_register_ref (x) + rtx x; +{ + bool retv = FALSE; + struct s390_address addr; + int i, j; + const char *fmt; + + /* Addresses can only occur inside a MEM ... */ + if (GET_CODE (x) == MEM) + { + if (s390_decompose_address (XEXP (x, 0), &addr) + && find_base_register_in_addr (&addr)) + return TRUE; + } + + /* ... or a load-address type pattern. */ + if (GET_CODE (x) == SET && GET_CODE (SET_DEST (x)) == REG) + { + if (s390_decompose_address (SET_SRC (x), &addr) + && find_base_register_in_addr (&addr)) + return TRUE; + } + + fmt = GET_RTX_FORMAT (GET_CODE (x)); + for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + retv |= find_base_register_ref (XEXP (x, i)); + } + else if (fmt[i] == 'E') + { + for (j = 0; j < XVECLEN (x, i); j++) + retv |= find_base_register_ref (XVECEXP (x, i, j)); + } + } + + return retv; +} + +/* If X contains an address that uses the base register, + without actually constituting a literal pool access, + replace the base register with REPL in all such cases. + + Handles both MEMs and load address patterns. */ + +static void +replace_base_register_ref (x, repl) + rtx *x; + rtx repl; +{ + struct s390_address addr; + rtx new_addr; + int i, j, pos; + const char *fmt; + + /* Addresses can only occur inside a MEM ... */ + if (GET_CODE (*x) == MEM) + { + if (s390_decompose_address (XEXP (*x, 0), &addr) + && (pos = find_base_register_in_addr (&addr))) + { + if (pos == 1) + addr.base = repl; + else + addr.indx = repl; + + new_addr = addr.base; + if (addr.indx) + new_addr = gen_rtx_PLUS (Pmode, new_addr, addr.indx); + if (addr.disp) + new_addr = gen_rtx_PLUS (Pmode, new_addr, addr.disp); + + *x = replace_equiv_address (*x, new_addr); + return; + } + } + + /* ... or a load-address type pattern. */ + if (GET_CODE (*x) == SET && GET_CODE (SET_DEST (*x)) == REG) + { + if (s390_decompose_address (SET_SRC (*x), &addr) + && (pos = find_base_register_in_addr (&addr))) + { + if (pos == 1) + addr.base = repl; + else + addr.indx = repl; + + new_addr = addr.base; + if (addr.indx) + new_addr = gen_rtx_PLUS (Pmode, new_addr, addr.indx); + if (addr.disp) + new_addr = gen_rtx_PLUS (Pmode, new_addr, addr.disp); + + SET_SRC (*x) = new_addr; + return; + } + } + + fmt = GET_RTX_FORMAT (GET_CODE (*x)); + for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) + { + if (fmt[i] == 'e') + { + replace_base_register_ref (&XEXP (*x, i), repl); + } + else if (fmt[i] == 'E') + { + for (j = 0; j < XVECLEN (*x, i); j++) + replace_base_register_ref (&XVECEXP (*x, i, j), repl); + } + } +} + + /* We keep a list of constants we which we have to add to internal constant tables in the middle of large functions. */ @@ -2613,17 +3916,26 @@ struct constant_pool { struct constant_pool *next; rtx first_insn; - rtx last_insn; + rtx pool_insn; + bitmap insns; struct constant *constants[NR_C_MODES]; rtx label; int size; + bool anchor; }; +static struct constant_pool * s390_chunkify_start PARAMS ((rtx, bool *)); +static void s390_chunkify_finish PARAMS ((struct constant_pool *, rtx)); +static void s390_chunkify_cancel PARAMS ((struct constant_pool *)); + static struct constant_pool *s390_start_pool PARAMS ((struct constant_pool **, rtx)); static void s390_end_pool PARAMS ((struct constant_pool *, rtx)); +static void s390_add_pool_insn PARAMS ((struct constant_pool *, rtx)); static struct constant_pool *s390_find_pool PARAMS ((struct constant_pool *, rtx)); -static rtx s390_add_pool PARAMS ((struct constant_pool *, rtx, enum machine_mode)); +static void s390_add_constant PARAMS ((struct constant_pool *, rtx, enum machine_mode)); +static rtx s390_find_constant PARAMS ((struct constant_pool *, rtx, enum machine_mode)); +static void s390_add_anchor PARAMS ((struct constant_pool *)); static rtx s390_dump_pool PARAMS ((struct constant_pool *)); static void s390_free_pool PARAMS ((struct constant_pool *)); @@ -2645,9 +3957,11 @@ s390_start_pool (pool_list, insn) pool->label = gen_label_rtx (); pool->first_insn = insn; - pool->last_insn = NULL_RTX; + pool->pool_insn = NULL_RTX; + pool->insns = BITMAP_XMALLOC (); pool->size = 0; - + pool->anchor = FALSE; + for (prev = pool_list; *prev; prev = &(*prev)->next) ; *prev = pool; @@ -2655,14 +3969,31 @@ s390_start_pool (pool_list, insn) return pool; } -/* End range of instructions covered by POOL at INSN. */ +/* End range of instructions covered by POOL at INSN and emit + placeholder insn representing the pool. */ static void s390_end_pool (pool, insn) struct constant_pool *pool; rtx insn; { - pool->last_insn = insn; + rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */); + + if (!insn) + insn = get_last_insn (); + + pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn); + INSN_ADDRESSES_NEW (pool->pool_insn, -1); +} + +/* Add INSN to the list of insns covered by POOL. */ + +static void +s390_add_pool_insn (pool, insn) + struct constant_pool *pool; + rtx insn; +{ + bitmap_set_bit (pool->insns, INSN_UID (insn)); } /* Return pool out of POOL_LIST that covers INSN. */ @@ -2672,33 +4003,24 @@ s390_find_pool (pool_list, insn) struct constant_pool *pool_list; rtx insn; { - int addr = INSN_ADDRESSES (INSN_UID (insn)); struct constant_pool *pool; - if (addr == -1) - return NULL; - for (pool = pool_list; pool; pool = pool->next) - if (INSN_ADDRESSES (INSN_UID (pool->first_insn)) <= addr - && (pool->last_insn == NULL_RTX - || INSN_ADDRESSES (INSN_UID (pool->last_insn)) > addr)) + if (bitmap_bit_p (pool->insns, INSN_UID (insn))) break; return pool; } -/* Add constant VAL of mode MODE to the constant pool POOL. - Return an RTX describing the distance from the start of - the pool to the location of the new constant. */ +/* Add constant VAL of mode MODE to the constant pool POOL. */ -static rtx -s390_add_pool (pool, val, mode) +static void +s390_add_constant (pool, val, mode) struct constant_pool *pool; rtx val; enum machine_mode mode; { struct constant *c; - rtx offset; int i; for (i = 0; i < NR_C_MODES; i++) @@ -2720,13 +4042,54 @@ s390_add_pool (pool, val, mode) pool->constants[i] = c; pool->size += GET_MODE_SIZE (mode); } +} - offset = gen_rtx_MINUS (Pmode, gen_rtx_LABEL_REF (Pmode, c->label), - gen_rtx_LABEL_REF (Pmode, pool->label)); +/* Find constant VAL of mode MODE in the constant pool POOL. + Return an RTX describing the distance from the start of + the pool to the location of the new constant. */ + +static rtx +s390_find_constant (pool, val, mode) + struct constant_pool *pool; + rtx val; + enum machine_mode mode; +{ + struct constant *c; + rtx offset; + int i; + + for (i = 0; i < NR_C_MODES; i++) + if (constant_modes[i] == mode) + break; + if (i == NR_C_MODES) + abort (); + + for (c = pool->constants[i]; c != NULL; c = c->next) + if (rtx_equal_p (val, c->value)) + break; + + if (c == NULL) + abort (); + + offset = gen_rtx_MINUS (Pmode, gen_rtx_LABEL_REF (Pmode, c->label), + gen_rtx_LABEL_REF (Pmode, pool->label)); offset = gen_rtx_CONST (Pmode, offset); return offset; } +/* Set 'anchor' flag in POOL. */ + +static void +s390_add_anchor (pool) + struct constant_pool *pool; +{ + if (!pool->anchor) + { + pool->anchor = TRUE; + pool->size += 4; + } +} + /* Dump out the constants in POOL. */ static rtx @@ -2737,31 +4100,47 @@ s390_dump_pool (pool) rtx insn; int i; - /* Select location to put literal pool. */ - if (TARGET_64BIT) - insn = get_last_insn (); - else - insn = pool->last_insn? pool->last_insn : get_last_insn (); - /* Pool start insn switches to proper section and guarantees necessary alignment. */ if (TARGET_64BIT) - insn = emit_insn_after (gen_pool_start_64 (), insn); + insn = emit_insn_after (gen_pool_start_64 (), pool->pool_insn); else - insn = emit_insn_after (gen_pool_start_31 (), insn); + insn = emit_insn_after (gen_pool_start_31 (), pool->pool_insn); INSN_ADDRESSES_NEW (insn, -1); insn = emit_label_after (pool->label, insn); INSN_ADDRESSES_NEW (insn, -1); + /* Emit anchor if we need one. */ + if (pool->anchor) + { + rtx anchor = gen_rtx_LABEL_REF (VOIDmode, pool->label); + anchor = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, anchor), 105); + anchor = gen_rtx_CONST (VOIDmode, anchor); + insn = emit_insn_after (gen_consttable_si (anchor), insn); + INSN_ADDRESSES_NEW (insn, -1); + } + /* Dump constants in descending alignment requirement order, ensuring proper alignment for every constant. */ for (i = 0; i < NR_C_MODES; i++) for (c = pool->constants[i]; c; c = c->next) { + /* Convert 104 unspecs to pool-relative references. */ + rtx value = c->value; + if (GET_CODE (value) == CONST + && GET_CODE (XEXP (value, 0)) == UNSPEC + && XINT (XEXP (value, 0), 1) == 104 + && XVECLEN (XEXP (value, 0), 0) == 1) + { + value = gen_rtx_MINUS (Pmode, XVECEXP (XEXP (value, 0), 0, 0), + gen_rtx_LABEL_REF (VOIDmode, pool->label)); + value = gen_rtx_CONST (VOIDmode, value); + } + insn = emit_label_after (c->label, insn); INSN_ADDRESSES_NEW (insn, -1); - insn = emit_insn_after (gen_consttable[i] (c->value), insn); + insn = emit_insn_after (gen_consttable[i] (value), insn); INSN_ADDRESSES_NEW (insn, -1); } @@ -2776,6 +4155,9 @@ s390_dump_pool (pool) insn = emit_barrier_after (insn); INSN_ADDRESSES_NEW (insn, -1); + /* Remove placeholder insn. */ + remove_insn (pool->pool_insn); + return insn; } @@ -2798,57 +4180,86 @@ s390_free_pool (pool) } } + BITMAP_XFREE (pool->insns); free (pool); } -/* Used in s390.md for branch length calculation. */ -int s390_pool_overflow = 0; -/* Chunkify the literal pool if required. */ +/* Chunkify the literal pool if required. + + Code generated by this routine is allowed to use + TEMP_REG as temporary scratch register. If this is + done, TEMP_USED is set to true. */ #define S390_POOL_CHUNK_MIN 0xc00 #define S390_POOL_CHUNK_MAX 0xe00 -static void -s390_chunkify_pool (void) +static struct constant_pool * +s390_chunkify_start (temp_reg, temp_used) + rtx temp_reg; + bool *temp_used; { - rtx base_reg = gen_rtx_REG (Pmode, - TARGET_64BIT? BASE_REGISTER : RETURN_REGNUM); + rtx base_reg = gen_rtx_REG (Pmode, BASE_REGISTER); struct constant_pool *curr_pool = NULL, *pool_list = NULL; int extra_size = 0; bitmap far_labels; rtx insn; + rtx (*gen_reload_base) PARAMS ((rtx, rtx)) = + TARGET_64BIT? gen_reload_base_64 : gen_reload_base_31; + + /* Do we need to chunkify the literal pool? */ if (get_pool_size () < S390_POOL_CHUNK_MAX) - return; + return NULL; + + /* We need correct insn addresses. */ + + shorten_branches (get_insns ()); /* Scan all insns and move literals to pool chunks. - Replace all occurrances of literal pool references - by explicit references to pool chunk entries. */ + Also, emit anchor reload insns before every insn that uses + the literal pool base register as anchor pointer. */ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { - if (GET_CODE (insn) == INSN) + if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN) { - rtx addr, pool_ref = NULL_RTX; + rtx pool_ref = NULL_RTX; find_constant_pool_ref (PATTERN (insn), &pool_ref); if (pool_ref) { if (!curr_pool) curr_pool = s390_start_pool (&pool_list, insn); - addr = s390_add_pool (curr_pool, get_pool_constant (pool_ref), - get_pool_mode (pool_ref)); + s390_add_constant (curr_pool, get_pool_constant (pool_ref), + get_pool_mode (pool_ref)); + s390_add_pool_insn (curr_pool, insn); + } - addr = gen_rtx_PLUS (Pmode, base_reg, addr); - replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); - INSN_CODE (insn) = -1; + else if (!TARGET_64BIT && flag_pic + && find_base_register_ref (PATTERN (insn))) + { + rtx new = gen_reload_anchor (temp_reg, base_reg); + new = emit_insn_before (new, insn); + INSN_ADDRESSES_NEW (new, INSN_ADDRESSES (INSN_UID (insn))); + extra_size += 8; + *temp_used = 1; + + if (!curr_pool) + curr_pool = s390_start_pool (&pool_list, new); + + s390_add_anchor (curr_pool); + s390_add_pool_insn (curr_pool, insn); } } + if (GET_CODE (insn) == JUMP_INSN || GET_CODE (insn) == CODE_LABEL) + if (curr_pool) + s390_add_pool_insn (curr_pool, insn); + if (!curr_pool || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn) || INSN_ADDRESSES (INSN_UID (insn)) == -1) @@ -2859,7 +4270,7 @@ s390_chunkify_pool (void) if (curr_pool->size < S390_POOL_CHUNK_MAX) continue; - s390_end_pool (curr_pool, insn); + s390_end_pool (curr_pool, NULL_RTX); curr_pool = NULL; } else @@ -2872,11 +4283,8 @@ s390_chunkify_pool (void) Those will have an effect on code size, which we need to consider here. This calculation makes rather pessimistic worst-case assumptions. */ - if (GET_CODE (insn) == CODE_LABEL - || GET_CODE (insn) == JUMP_INSN) + if (GET_CODE (insn) == CODE_LABEL) extra_size += 6; - else if (GET_CODE (insn) == CALL_INSN) - extra_size += 4; if (chunk_size < S390_POOL_CHUNK_MIN && curr_pool->size < S390_POOL_CHUNK_MIN) @@ -2892,12 +4300,22 @@ s390_chunkify_pool (void) /* ... so if we don't find one in time, create one. */ else if ((chunk_size > S390_POOL_CHUNK_MAX - || curr_pool->size > S390_POOL_CHUNK_MAX) - && (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN)) + || curr_pool->size > S390_POOL_CHUNK_MAX)) { - int addr = INSN_ADDRESSES (INSN_UID (insn)); rtx label, jump, barrier; + /* We can insert the barrier only after a 'real' insn. */ + if (GET_CODE (insn) != INSN && GET_CODE (insn) != CALL_INSN) + continue; + if (get_attr_length (insn) == 0) + continue; + + /* Don't separate insns created by s390_split_branches. */ + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && rtx_equal_p (SET_DEST (PATTERN (insn)), temp_reg)) + continue; + label = gen_label_rtx (); jump = emit_jump_insn_after (gen_jump (label), insn); barrier = emit_barrier_after (jump); @@ -2905,8 +4323,8 @@ s390_chunkify_pool (void) JUMP_LABEL (jump) = label; LABEL_NUSES (label) = 1; - INSN_ADDRESSES_NEW (jump, addr+1); - INSN_ADDRESSES_NEW (barrier, addr+1); + INSN_ADDRESSES_NEW (jump, -1); + INSN_ADDRESSES_NEW (barrier, -1); INSN_ADDRESSES_NEW (insn, -1); s390_end_pool (curr_pool, barrier); @@ -2916,10 +4334,8 @@ s390_chunkify_pool (void) } } - /* Dump out all literal pools. */ - - for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) - s390_dump_pool (curr_pool); + if (curr_pool) + s390_end_pool (curr_pool, NULL_RTX); /* Find all labels that are branched into @@ -2953,22 +4369,12 @@ s390_chunkify_pool (void) else if (GET_CODE (insn) == JUMP_INSN) { rtx pat = PATTERN (insn); + if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2) + pat = XVECEXP (pat, 0, 0); + if (GET_CODE (pat) == SET) { - rtx label = 0; - - if (GET_CODE (SET_SRC (pat)) == LABEL_REF) - { - label = XEXP (SET_SRC (pat), 0); - } - else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE) - { - if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF) - label = XEXP (XEXP (SET_SRC (pat), 1), 0); - else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF) - label = XEXP (XEXP (SET_SRC (pat), 2), 0); - } - + rtx label = JUMP_LABEL (insn); if (label) { if (s390_find_pool (pool_list, label) @@ -3009,19 +4415,11 @@ s390_chunkify_pool (void) /* Insert base register reload insns before every pool. */ for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) - if (TARGET_64BIT) - { - rtx pool_ref = gen_rtx_LABEL_REF (Pmode, curr_pool->label); - rtx new_insn = gen_rtx_SET (Pmode, base_reg, pool_ref); - rtx insn = curr_pool->first_insn; - INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1); - } - else - { - rtx new_insn = gen_reload_base (base_reg, curr_pool->label); - rtx insn = curr_pool->first_insn; - INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1); - } + { + rtx new_insn = gen_reload_base (base_reg, curr_pool->label); + rtx insn = curr_pool->first_insn; + INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1); + } /* Insert base register reload insns at every far label. */ @@ -3032,60 +4430,137 @@ s390_chunkify_pool (void) struct constant_pool *pool = s390_find_pool (pool_list, insn); if (pool) { - if (TARGET_64BIT) - { - rtx pool_ref = gen_rtx_LABEL_REF (Pmode, pool->label); - rtx new_insn = gen_rtx_SET (Pmode, base_reg, pool_ref); - INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); - } - else - { - rtx new_insn = gen_reload_base (base_reg, pool->label); - INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); - } + rtx new_insn = gen_reload_base (base_reg, pool->label); + INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); } } - /* Insert base register reload insns after every call if necessary. */ - - if (REGNO (base_reg) == RETURN_REGNUM) - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - if (GET_CODE (insn) == CALL_INSN) - { - struct constant_pool *pool = s390_find_pool (pool_list, insn); - if (pool) - { - rtx new_insn = gen_reload_base2 (base_reg, pool->label); - INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); - } - } + + BITMAP_XFREE (far_labels); /* Recompute insn addresses. */ - s390_pool_overflow = 1; init_insn_lengths (); shorten_branches (get_insns ()); - s390_pool_overflow = 0; - /* Insert base register reload insns after far branches. */ + return pool_list; +} - if (!TARGET_64BIT) - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - if (GET_CODE (insn) == JUMP_INSN - && GET_CODE (PATTERN (insn)) == SET - && get_attr_length (insn) >= 12) - { - struct constant_pool *pool = s390_find_pool (pool_list, insn); - if (pool) +/* POOL_LIST is a chunk list as prepared by s390_chunkify_start. + After we have decided to use this list, finish implementing + all changes to the current function as required. + + Code generated by this routine is allowed to use + TEMP_REG as temporary scratch register. */ + +static void +s390_chunkify_finish (pool_list, temp_reg) + struct constant_pool *pool_list; + rtx temp_reg; +{ + rtx base_reg = gen_rtx_REG (Pmode, BASE_REGISTER); + struct constant_pool *curr_pool = NULL; + rtx insn; + + + /* Replace all literal pool references. */ + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + curr_pool = s390_find_pool (pool_list, insn); + if (!curr_pool) + continue; + + if (GET_CODE (insn) == INSN || GET_CODE (insn) == CALL_INSN) + { + rtx addr, pool_ref = NULL_RTX; + find_constant_pool_ref (PATTERN (insn), &pool_ref); + if (pool_ref) + { + addr = s390_find_constant (curr_pool, get_pool_constant (pool_ref), + get_pool_mode (pool_ref)); + addr = gen_rtx_PLUS (Pmode, base_reg, addr); + replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); + INSN_CODE (insn) = -1; + } + + else if (!TARGET_64BIT && flag_pic + && find_base_register_ref (PATTERN (insn))) { - rtx new_insn = gen_reload_base (base_reg, pool->label); - INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); + replace_base_register_ref (&PATTERN (insn), temp_reg); } + } + } + + /* Dump out all literal pools. */ + + for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) + s390_dump_pool (curr_pool); + + /* Free pool list. */ + + while (pool_list) + { + struct constant_pool *next = pool_list->next; + s390_free_pool (pool_list); + pool_list = next; + } +} + +/* POOL_LIST is a chunk list as prepared by s390_chunkify_start. + We have decided we cannot use this list, so revert all changes + to the current function that were done by s390_chunkify_start. */ + +static void +s390_chunkify_cancel (pool_list) + struct constant_pool *pool_list; +{ + struct constant_pool *curr_pool = NULL; + rtx insn; + + /* Remove all pool placeholder insns. */ + + for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) + { + /* Did we insert an extra barrier? Remove it. */ + rtx barrier = PREV_INSN (curr_pool->pool_insn); + rtx jump = barrier? PREV_INSN (barrier) : NULL_RTX; + rtx label = NEXT_INSN (curr_pool->pool_insn); + + if (jump && GET_CODE (jump) == JUMP_INSN + && barrier && GET_CODE (barrier) == BARRIER + && label && GET_CODE (label) == CODE_LABEL + && GET_CODE (PATTERN (jump)) == SET + && SET_DEST (PATTERN (jump)) == pc_rtx + && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF + && XEXP (SET_SRC (PATTERN (jump)), 0) == label) + { + remove_insn (jump); + remove_insn (barrier); + remove_insn (label); } + remove_insn (curr_pool->pool_insn); + } - /* Free all memory. */ + /* Remove all base/anchor register reload insns. */ + + for (insn = get_insns (); insn; ) + { + rtx next_insn = NEXT_INSN (insn); + + if (GET_CODE (insn) == INSN + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC + && (XINT (SET_SRC (PATTERN (insn)), 1) == 210 + || XINT (SET_SRC (PATTERN (insn)), 1) == 211)) + remove_insn (insn); + + insn = next_insn; + } + + /* Free pool list. */ while (pool_list) { @@ -3093,8 +4568,6 @@ s390_chunkify_pool (void) s390_free_pool (pool_list); pool_list = next; } - - BITMAP_XFREE (far_labels); } @@ -3108,54 +4581,359 @@ int s390_nr_constants; /* Output main constant pool to stdio stream FILE. */ void -s390_output_constant_pool (file) - FILE *file; +s390_output_constant_pool (start_label, end_label) + rtx start_label; + rtx end_label; { - /* Output constant pool. */ - if (s390_nr_constants) + if (TARGET_64BIT) { - if (TARGET_64BIT) + readonly_data_section (); + ASM_OUTPUT_ALIGN (asm_out_file, 3); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (start_label)); + } + else + { + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", + CODE_LABEL_NUMBER (start_label)); + ASM_OUTPUT_ALIGN (asm_out_file, 2); + } + + s390_pool_count = 0; + output_constant_pool (current_function_name, current_function_decl); + s390_pool_count = -1; + if (TARGET_64BIT) + function_section (current_function_decl); + else + { + ASM_OUTPUT_ALIGN (asm_out_file, 1); + ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (end_label)); + } +} + +/* Rework the prolog/epilog to avoid saving/restoring + registers unnecessarily. If TEMP_REGNO is nonnegative, + it specifies the number of a caller-saved register used + as temporary scratch register by code emitted during + machine dependent reorg. */ + +static void +s390_optimize_prolog (temp_regno) + int temp_regno; +{ + int save_first, save_last, restore_first, restore_last; + int i, j; + rtx insn, new_insn, next_insn; + + /* Recompute regs_ever_live data for special registers. */ + regs_ever_live[BASE_REGISTER] = 0; + regs_ever_live[RETURN_REGNUM] = 0; + regs_ever_live[STACK_POINTER_REGNUM] = cfun->machine->frame_size > 0; + + /* If there is (possibly) any pool entry, we need to + load the base register. + ??? FIXME: this should be more precise. */ + if (get_pool_size ()) + regs_ever_live[BASE_REGISTER] = 1; + + /* In non-leaf functions, the prolog/epilog code relies + on RETURN_REGNUM being saved in any case. */ + if (!current_function_is_leaf) + regs_ever_live[RETURN_REGNUM] = 1; + + /* We need to save/restore the temporary register. */ + if (temp_regno >= 0) + regs_ever_live[temp_regno] = 1; + + + /* Find first and last gpr to be saved. */ + + for (i = 6; i < 16; i++) + if (regs_ever_live[i]) + if (!global_regs[i] + || i == STACK_POINTER_REGNUM + || i == RETURN_REGNUM + || i == BASE_REGISTER + || (flag_pic && i == (int)PIC_OFFSET_TABLE_REGNUM)) + break; + + for (j = 15; j > i; j--) + if (regs_ever_live[j]) + if (!global_regs[j] + || j == STACK_POINTER_REGNUM + || j == RETURN_REGNUM + || j == BASE_REGISTER + || (flag_pic && j == (int)PIC_OFFSET_TABLE_REGNUM)) + break; + + if (i == 16) + { + /* Nothing to save/restore. */ + save_first = restore_first = -1; + save_last = restore_last = -1; + } + else + { + /* Save/restore from i to j. */ + save_first = restore_first = i; + save_last = restore_last = j; + } + + /* Varargs functions need to save gprs 2 to 6. */ + if (current_function_stdarg) + { + save_first = 2; + if (save_last < 6) + save_last = 6; + } + + + /* If all special registers are in fact used, there's nothing we + can do, so no point in walking the insn list. */ + if (i <= BASE_REGISTER && j >= BASE_REGISTER + && i <= RETURN_REGNUM && j >= RETURN_REGNUM) + return; + + + /* Search for prolog/epilog insns and replace them. */ + + for (insn = get_insns (); insn; insn = next_insn) + { + int first, last, off; + rtx set, base, offset; + + next_insn = NEXT_INSN (insn); + + if (GET_CODE (insn) != INSN) + continue; + if (GET_CODE (PATTERN (insn)) != PARALLEL) + continue; + + if (store_multiple_operation (PATTERN (insn), VOIDmode)) { - fprintf (file, "\tlarl\t%s,.LT%X\n", reg_names[BASE_REGISTER], - s390_function_count); - readonly_data_section (); - ASM_OUTPUT_ALIGN (file, 3); + set = XVECEXP (PATTERN (insn), 0, 0); + first = REGNO (SET_SRC (set)); + last = first + XVECLEN (PATTERN (insn), 0) - 1; + offset = const0_rtx; + base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset); + off = INTVAL (offset) - first * UNITS_PER_WORD; + + if (GET_CODE (base) != REG || off < 0) + continue; + if (first > BASE_REGISTER && first > RETURN_REGNUM) + continue; + if (last < BASE_REGISTER && last < RETURN_REGNUM) + continue; + + if (save_first != -1) + { + new_insn = save_gprs (base, off, save_first, save_last); + new_insn = emit_insn_before (new_insn, insn); + INSN_ADDRESSES_NEW (new_insn, -1); + } + + remove_insn (insn); } - else + + if (load_multiple_operation (PATTERN (insn), VOIDmode)) { - fprintf (file, "\tbras\t%s,.LTN%X\n", reg_names[BASE_REGISTER], - s390_function_count); + set = XVECEXP (PATTERN (insn), 0, 0); + first = REGNO (SET_DEST (set)); + last = first + XVECLEN (PATTERN (insn), 0) - 1; + offset = const0_rtx; + base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset); + off = INTVAL (offset) - first * UNITS_PER_WORD; + + if (GET_CODE (base) != REG || off < 0) + continue; + if (first > BASE_REGISTER && first > RETURN_REGNUM) + continue; + if (last < BASE_REGISTER && last < RETURN_REGNUM) + continue; + + if (restore_first != -1) + { + new_insn = restore_gprs (base, off, restore_first, restore_last); + new_insn = emit_insn_before (new_insn, insn); + INSN_ADDRESSES_NEW (new_insn, -1); + } + + remove_insn (insn); } - fprintf (file, ".LT%X:\n", s390_function_count); + } +} - s390_pool_count = 0; - output_constant_pool (current_function_name, current_function_decl); - s390_pool_count = -1; +/* Check whether any insn in the function makes use of the original + value of RETURN_REG (e.g. for __builtin_return_address). + If so, insert an insn reloading that value. - if (TARGET_64BIT) - function_section (current_function_decl); + Return true if any such insn was found. */ + +static bool +s390_fixup_clobbered_return_reg (return_reg) + rtx return_reg; +{ + bool replacement_done = 0; + rtx insn; + + /* If we never called __builtin_return_address, register 14 + might have been used as temp during the prolog; we do + not want to touch those uses. */ + if (!has_hard_reg_initial_val (Pmode, REGNO (return_reg))) + return false; + + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + rtx reg, off, new_insn; + + if (GET_CODE (insn) != INSN) + continue; + if (!reg_referenced_p (return_reg, PATTERN (insn))) + continue; + if (GET_CODE (PATTERN (insn)) == PARALLEL + && store_multiple_operation (PATTERN (insn), VOIDmode)) + continue; + + if (frame_pointer_needed) + reg = hard_frame_pointer_rtx; else - fprintf (file, ".LTN%X:\n", s390_function_count); + reg = stack_pointer_rtx; + + off = GEN_INT (cfun->machine->frame_size + REGNO (return_reg) * UNITS_PER_WORD); + if (INTVAL (off) >= 4096) + { + off = force_const_mem (Pmode, off); + new_insn = gen_rtx_SET (Pmode, return_reg, off); + new_insn = emit_insn_before (new_insn, insn); + INSN_ADDRESSES_NEW (new_insn, -1); + off = return_reg; + } + + new_insn = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, reg, off)); + new_insn = gen_rtx_SET (Pmode, return_reg, new_insn); + new_insn = emit_insn_before (new_insn, insn); + INSN_ADDRESSES_NEW (new_insn, -1); + + replacement_done = 1; } -} + return replacement_done; +} -/* Return true if floating point registers need to be saved. */ +/* Perform machine-dependent processing. */ -static int -save_fprs_p () +void +s390_machine_dependent_reorg (first) + rtx first ATTRIBUTE_UNUSED; { - int i; - if (!TARGET_64BIT) - return 0; - for (i=24; i<=31; i++) + bool fixed_up_clobbered_return_reg = 0; + rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); + bool temp_used = 0; + + /* Make sure all splits have been performed; splits after + machine_dependent_reorg might confuse insn length counts. */ + split_all_insns_noflow (); + + + /* There are two problematic situations we need to correct: + + - the literal pool might be > 4096 bytes in size, so that + some of its elements cannot be directly accessed + + - a branch target might be > 64K away from the branch, so that + it is not possible to use a PC-relative instruction. + + To fix those, we split the single literal pool into multiple + pool chunks, reloading the pool base register at various + points throughout the function to ensure it always points to + the pool chunk the following code expects, and / or replace + PC-relative branches by absolute branches. + + However, the two problems are interdependent: splitting the + literal pool can move a branch further away from its target, + causing the 64K limit to overflow, and on the other hand, + replacing a PC-relative branch by an absolute branch means + we need to put the branch target address into the literal + pool, possibly causing it to overflow. + + So, we loop trying to fix up both problems until we manage + to satisfy both conditions at the same time. Note that the + loop is guaranteed to terminate as every pass of the loop + strictly decreases the total number of PC-relative branches + in the function. (This is not completely true as there + might be branch-over-pool insns introduced by chunkify_start. + Those never need to be split however.) */ + + for (;;) { - if (regs_ever_live[i] == 1) - return 1; + struct constant_pool *pool_list; + + /* Try to chunkify the literal pool. */ + pool_list = s390_chunkify_start (temp_reg, &temp_used); + + /* Split out-of-range branches. If this has created new + literal pool entries, cancel current chunk list and + recompute it. */ + if (s390_split_branches (temp_reg, &temp_used)) + { + if (pool_list) + s390_chunkify_cancel (pool_list); + + continue; + } + + /* Check whether we have clobbered a use of the return + register (e.g. for __builtin_return_address). If so, + add insns reloading the register where necessary. */ + if (temp_used && !fixed_up_clobbered_return_reg + && s390_fixup_clobbered_return_reg (temp_reg)) + { + fixed_up_clobbered_return_reg = 1; + + /* The fixup insns might have caused a jump to overflow. */ + if (pool_list) + s390_chunkify_cancel (pool_list); + + continue; + } + + /* If we made it up to here, both conditions are satisfied. + Finish up pool chunkification if required. */ + if (pool_list) + s390_chunkify_finish (pool_list, temp_reg); + + break; } - return 0; + + s390_optimize_prolog (temp_used? RETURN_REGNUM : -1); } + +/* Return an RTL expression representing the value of the return address + for the frame COUNT steps up from the current frame. FRAME is the + frame pointer of that frame. */ + +rtx +s390_return_addr_rtx (count, frame) + int count; + rtx frame; +{ + rtx addr; + + /* For the current frame, we use the initial value of RETURN_REGNUM. + This works both in leaf and non-leaf functions. */ + + if (count == 0) + return get_hard_reg_initial_val (Pmode, RETURN_REGNUM); + + /* For frames farther back, we read the stack slot where the + corresponding RETURN_REGNUM value was saved. */ + + addr = plus_constant (frame, RETURN_REGNUM * UNITS_PER_WORD); + addr = memory_address (Pmode, addr); + return gen_rtx_MEM (Pmode, addr); +} + /* Find first call clobbered register unsused in a function. This could be used as base register in a leaf function or for holding the return address before epilogue. */ @@ -3173,9 +4951,9 @@ find_unused_clobbered_reg () /* Fill FRAME with info about frame of current function. */ static void -s390_frame_info (frame) - struct s390_frame *frame; +s390_frame_info () { + char gprs_ever_live[16]; int i, j; HOST_WIDE_INT fsize = get_frame_size (); @@ -3183,79 +4961,59 @@ s390_frame_info (frame) fatal_error ("Total size of local variables exceeds architecture limit."); /* fprs 8 - 15 are caller saved for 64 Bit ABI. */ - frame->save_fprs_p = save_fprs_p (); + cfun->machine->save_fprs_p = 0; + if (TARGET_64BIT) + for (i = 24; i < 32; i++) + if (regs_ever_live[i] && !global_regs[i]) + { + cfun->machine->save_fprs_p = 1; + break; + } - frame->frame_size = fsize + frame->save_fprs_p * 64; + cfun->machine->frame_size = fsize + cfun->machine->save_fprs_p * 64; /* Does function need to setup frame and save area. */ if (! current_function_is_leaf - || frame->frame_size > 0 + || cfun->machine->frame_size > 0 || current_function_calls_alloca - || current_function_stdarg - || current_function_varargs) - frame->frame_size += STARTING_FRAME_OFFSET; - - /* If we need to allocate a frame, the stack pointer is changed. */ + || current_function_stdarg) + cfun->machine->frame_size += STARTING_FRAME_OFFSET; - if (frame->frame_size > 0) - regs_ever_live[STACK_POINTER_REGNUM] = 1; + /* Find first and last gpr to be saved. Note that at this point, + we assume the return register and the base register always + need to be saved. This is done because the usage of these + register might change even after the prolog was emitted. + If it turns out later that we really don't need them, the + prolog/epilog code is modified again. */ - /* If the literal pool might overflow, the return register might - be used as temp literal pointer. */ - - if (!TARGET_64BIT && get_pool_size () >= S390_POOL_CHUNK_MAX / 2) - regs_ever_live[RETURN_REGNUM] = 1; + for (i = 0; i < 16; i++) + gprs_ever_live[i] = regs_ever_live[i] && !global_regs[i]; - /* If there is (possibly) any pool entry, we need to - load base register. */ - - if (get_pool_size () - || !CONST_OK_FOR_LETTER_P (frame->frame_size, 'K') - || (!TARGET_64BIT && current_function_uses_pic_offset_table)) - regs_ever_live[BASE_REGISTER] = 1; - - /* If we need the GOT pointer, remember to save/restore it. */ - - if (current_function_uses_pic_offset_table) - regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1; - - /* Frame pointer needed. */ - - frame->frame_pointer_p = frame_pointer_needed; - - /* Find first and last gpr to be saved. */ + if (flag_pic) + gprs_ever_live[PIC_OFFSET_TABLE_REGNUM] = + regs_ever_live[PIC_OFFSET_TABLE_REGNUM]; + gprs_ever_live[BASE_REGISTER] = 1; + gprs_ever_live[RETURN_REGNUM] = 1; + gprs_ever_live[STACK_POINTER_REGNUM] = cfun->machine->frame_size > 0; for (i = 6; i < 16; i++) - if (regs_ever_live[i]) + if (gprs_ever_live[i]) break; for (j = 15; j > i; j--) - if (regs_ever_live[j]) + if (gprs_ever_live[j]) break; - - if (i == 16) - { - /* Nothing to save / restore. */ - frame->first_save_gpr = -1; - frame->first_restore_gpr = -1; - frame->last_save_gpr = -1; - frame->return_reg_saved_p = 0; - } - else - { - /* Save / Restore from gpr i to j. */ - frame->first_save_gpr = i; - frame->first_restore_gpr = i; - frame->last_save_gpr = j; - frame->return_reg_saved_p = (j >= RETURN_REGNUM && i <= RETURN_REGNUM); - } - if (current_function_stdarg || current_function_varargs) - { - /* Varargs function need to save from gpr 2 to gpr 15. */ - frame->first_save_gpr = 2; - } + + /* Save / Restore from gpr i to j. */ + cfun->machine->first_save_gpr = i; + cfun->machine->first_restore_gpr = i; + cfun->machine->last_save_gpr = j; + + /* Varargs functions need to save gprs 2 to 6. */ + if (current_function_stdarg) + cfun->machine->first_save_gpr = 2; } /* Return offset between argument pointer and frame pointer @@ -3264,13 +5022,29 @@ s390_frame_info (frame) int s390_arg_frame_offset () { - struct s390_frame frame; + HOST_WIDE_INT fsize = get_frame_size (); + int save_fprs_p, i; - /* Compute frame_info. */ + /* fprs 8 - 15 are caller saved for 64 Bit ABI. */ + save_fprs_p = 0; + if (TARGET_64BIT) + for (i = 24; i < 32; i++) + if (regs_ever_live[i] && !global_regs[i]) + { + save_fprs_p = 1; + break; + } - s390_frame_info (&frame); + fsize = fsize + save_fprs_p * 64; - return frame.frame_size + STACK_POINTER_OFFSET; + /* Does function need to setup frame and save area. */ + + if (! current_function_is_leaf + || fsize > 0 + || current_function_calls_alloca + || current_function_stdarg) + fsize += STARTING_FRAME_OFFSET; + return fsize + STACK_POINTER_OFFSET; } /* Emit insn to save fpr REGNUM at offset OFFSET relative @@ -3305,30 +5079,118 @@ restore_fpr (base, offset, regnum) return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr); } -/* Output the function prologue assembly code to the - stdio stream FILE. The local frame size is passed - in LSIZE. */ +/* Generate insn to save registers FIRST to LAST into + the register save area located at offset OFFSET + relative to register BASE. */ -void -s390_function_prologue (file, lsize) - FILE *file ATTRIBUTE_UNUSED; - HOST_WIDE_INT lsize ATTRIBUTE_UNUSED; +static rtx +save_gprs (base, offset, first, last) + rtx base; + int offset; + int first; + int last; { - s390_chunkify_pool (); - s390_split_branches (); + rtx addr, insn, note; + int i; + + addr = plus_constant (base, offset + first * UNITS_PER_WORD); + addr = gen_rtx_MEM (Pmode, addr); + set_mem_alias_set (addr, s390_sr_alias_set); + + /* Special-case single register. */ + if (first == last) + { + if (TARGET_64BIT) + insn = gen_movdi (addr, gen_rtx_REG (Pmode, first)); + else + insn = gen_movsi (addr, gen_rtx_REG (Pmode, first)); + + RTX_FRAME_RELATED_P (insn) = 1; + return insn; + } + + + insn = gen_store_multiple (addr, + gen_rtx_REG (Pmode, first), + GEN_INT (last - first + 1)); + + + /* We need to set the FRAME_RELATED flag on all SETs + inside the store-multiple pattern. + + However, we must not emit DWARF records for registers 2..5 + if they are stored for use by variable arguments ... + + ??? Unfortunately, it is not enough to simply not the the + FRAME_RELATED flags for those SETs, because the first SET + of the PARALLEL is always treated as if it had the flag + set, even if it does not. Therefore we emit a new pattern + without those registers as REG_FRAME_RELATED_EXPR note. */ + + if (first >= 6) + { + rtx pat = PATTERN (insn); + + for (i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET) + RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1; + + RTX_FRAME_RELATED_P (insn) = 1; + } + else if (last >= 6) + { + addr = plus_constant (base, offset + 6 * UNITS_PER_WORD); + note = gen_store_multiple (gen_rtx_MEM (Pmode, addr), + gen_rtx_REG (Pmode, 6), + GEN_INT (last - 6 + 1)); + note = PATTERN (note); + + REG_NOTES (insn) = + gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, + note, REG_NOTES (insn)); + + for (i = 0; i < XVECLEN (note, 0); i++) + if (GET_CODE (XVECEXP (note, 0, i)) == SET) + RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1; + + RTX_FRAME_RELATED_P (insn) = 1; + } + + return insn; } -/* Output the function epilogue assembly code to the - stdio stream FILE. The local frame size is passed - in LSIZE. */ +/* Generate insn to restore registers FIRST to LAST from + the register save area located at offset OFFSET + relative to register BASE. */ -void -s390_function_epilogue (file, lsize) - FILE *file ATTRIBUTE_UNUSED; - HOST_WIDE_INT lsize ATTRIBUTE_UNUSED; +static rtx +restore_gprs (base, offset, first, last) + rtx base; + int offset; + int first; + int last; { - current_function_uses_pic_offset_table = 0; - s390_function_count++; + rtx addr, insn; + + addr = plus_constant (base, offset + first * UNITS_PER_WORD); + addr = gen_rtx_MEM (Pmode, addr); + set_mem_alias_set (addr, s390_sr_alias_set); + + /* Special-case single register. */ + if (first == last) + { + if (TARGET_64BIT) + insn = gen_movdi (gen_rtx_REG (Pmode, first), addr); + else + insn = gen_movsi (gen_rtx_REG (Pmode, first), addr); + + return insn; + } + + insn = gen_load_multiple (gen_rtx_REG (Pmode, first), + addr, + GEN_INT (last - first + 1)); + return insn; } /* Expand the prologue into a bunch of separate insns. */ @@ -3336,18 +5198,18 @@ s390_function_epilogue (file, lsize) void s390_emit_prologue () { - struct s390_frame frame; rtx insn, addr; rtx temp_reg; + rtx pool_start_label, pool_end_label; int i; /* Compute frame_info. */ - s390_frame_info (&frame); + s390_frame_info (); /* Choose best register to use for temp use within prologue. */ - if (frame.return_reg_saved_p + if (!current_function_is_leaf && !has_hard_reg_initial_val (Pmode, RETURN_REGNUM) && get_pool_size () < S390_POOL_CHUNK_MAX / 2) temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); @@ -3356,75 +5218,26 @@ s390_emit_prologue () /* Save call saved gprs. */ - if (frame.first_save_gpr != -1) - { - addr = plus_constant (stack_pointer_rtx, - frame.first_save_gpr * UNITS_PER_WORD); - addr = gen_rtx_MEM (Pmode, addr); - set_mem_alias_set (addr, s390_sr_alias_set); - - if (frame.first_save_gpr != frame.last_save_gpr ) - { - insn = emit_insn (gen_store_multiple (addr, - gen_rtx_REG (Pmode, frame.first_save_gpr), - GEN_INT (frame.last_save_gpr - - frame.first_save_gpr + 1))); - - /* We need to set the FRAME_RELATED flag on all SETs - inside the store-multiple pattern. + insn = save_gprs (stack_pointer_rtx, 0, + cfun->machine->first_save_gpr, cfun->machine->last_save_gpr); + emit_insn (insn); - However, we must not emit DWARF records for registers 2..5 - if they are stored for use by variable arguments ... + /* Dump constant pool and set constant pool register. */ - ??? Unfortunately, it is not enough to simply not the the - FRAME_RELATED flags for those SETs, because the first SET - of the PARALLEL is always treated as if it had the flag - set, even if it does not. Therefore we emit a new pattern - without those registers as REG_FRAME_RELATED_EXPR note. */ - - if (frame.first_save_gpr >= 6) - { - rtx pat = PATTERN (insn); - - for (i = 0; i < XVECLEN (pat, 0); i++) - if (GET_CODE (XVECEXP (pat, 0, i)) == SET) - RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1; - - RTX_FRAME_RELATED_P (insn) = 1; - } - else if (frame.last_save_gpr >= 6) - { - rtx note, naddr; - naddr = plus_constant (stack_pointer_rtx, 6 * UNITS_PER_WORD); - note = gen_store_multiple (gen_rtx_MEM (Pmode, naddr), - gen_rtx_REG (Pmode, 6), - GEN_INT (frame.last_save_gpr - 6 + 1)); - REG_NOTES (insn) = - gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, - note, REG_NOTES (insn)); - - for (i = 0; i < XVECLEN (note, 0); i++) - if (GET_CODE (XVECEXP (note, 0, i)) == SET) - RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1; - - RTX_FRAME_RELATED_P (insn) = 1; - } - } - else - { - insn = emit_move_insn (addr, - gen_rtx_REG (Pmode, frame.first_save_gpr)); - RTX_FRAME_RELATED_P (insn) = 1; - } - } - - /* Dump constant pool and set constant pool register (13). */ - - insn = emit_insn (gen_lit ()); + pool_start_label = gen_label_rtx(); + pool_end_label = gen_label_rtx(); + cfun->machine->literal_pool_label = pool_start_label; + + if (TARGET_64BIT) + insn = emit_insn (gen_literal_pool_64 (gen_rtx_REG (Pmode, BASE_REGISTER), + pool_start_label, pool_end_label)); + else + insn = emit_insn (gen_literal_pool_31 (gen_rtx_REG (Pmode, BASE_REGISTER), + pool_start_label, pool_end_label)); /* Save fprs for variable args. */ - if (current_function_stdarg || current_function_varargs) + if (current_function_stdarg) { /* Save fpr 0 and 2. */ @@ -3445,12 +5258,12 @@ s390_emit_prologue () if (!TARGET_64BIT) { /* Save fpr 4 and 6. */ - if (regs_ever_live[18]) + if (regs_ever_live[18] && !global_regs[18]) { insn = save_fpr (stack_pointer_rtx, STACK_POINTER_OFFSET - 16, 18); RTX_FRAME_RELATED_P (insn) = 1; } - if (regs_ever_live[19]) + if (regs_ever_live[19] && !global_regs[19]) { insn = save_fpr (stack_pointer_rtx, STACK_POINTER_OFFSET - 8, 19); RTX_FRAME_RELATED_P (insn) = 1; @@ -3459,21 +5272,21 @@ s390_emit_prologue () /* Decrement stack pointer. */ - if (frame.frame_size > 0) + if (cfun->machine->frame_size > 0) { - rtx frame_off = GEN_INT (-frame.frame_size); + rtx frame_off = GEN_INT (-cfun->machine->frame_size); /* Save incoming stack pointer into temp reg. */ - if (TARGET_BACKCHAIN || frame.save_fprs_p) + if (TARGET_BACKCHAIN || cfun->machine->save_fprs_p) { insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx)); } /* Substract frame size from stack pointer. */ - frame_off = GEN_INT (-frame.frame_size); - if (!CONST_OK_FOR_LETTER_P (-frame.frame_size, 'K')) + frame_off = GEN_INT (-cfun->machine->frame_size); + if (!CONST_OK_FOR_LETTER_P (-cfun->machine->frame_size, 'K')) frame_off = force_const_mem (Pmode, frame_off); insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off)); @@ -3482,7 +5295,7 @@ s390_emit_prologue () gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, gen_rtx_SET (VOIDmode, stack_pointer_rtx, gen_rtx_PLUS (Pmode, stack_pointer_rtx, - GEN_INT (-frame.frame_size))), + GEN_INT (-cfun->machine->frame_size))), REG_NOTES (insn)); /* Set backchain. */ @@ -3493,19 +5306,29 @@ s390_emit_prologue () set_mem_alias_set (addr, s390_sr_alias_set); insn = emit_insn (gen_move_insn (addr, temp_reg)); } + + /* If we support asynchronous exceptions (e.g. for Java), + we need to make sure the backchain pointer is set up + before any possibly trapping memory access. */ + + if (TARGET_BACKCHAIN && flag_non_call_exceptions) + { + addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)); + emit_insn (gen_rtx_CLOBBER (VOIDmode, addr)); + } } /* Save fprs 8 - 15 (64 bit ABI). */ - if (frame.save_fprs_p) + if (cfun->machine->save_fprs_p) { insn = emit_insn (gen_add2_insn (temp_reg, GEN_INT(-64))); for (i = 24; i < 32; i++) - if (regs_ever_live[i]) + if (regs_ever_live[i] && !global_regs[i]) { rtx addr = plus_constant (stack_pointer_rtx, - frame.frame_size - 64 + (i-24)*8); + cfun->machine->frame_size - 64 + (i-24)*8); insn = save_fpr (temp_reg, (i-24)*8, i); RTX_FRAME_RELATED_P (insn) = 1; @@ -3520,7 +5343,7 @@ s390_emit_prologue () /* Set frame pointer, if needed. */ - if (frame.frame_pointer_p) + if (frame_pointer_needed) { insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); RTX_FRAME_RELATED_P (insn) = 1; @@ -3528,7 +5351,7 @@ s390_emit_prologue () /* Set up got pointer, if needed. */ - if (current_function_uses_pic_offset_table) + if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM]) { rtx got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); SYMBOL_REF_FLAG (got_symbol) = 1; @@ -3553,8 +5376,10 @@ s390_emit_prologue () REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, NULL_RTX, REG_NOTES (insn)); - insn = emit_insn (gen_add2_insn (pic_offset_table_rtx, - gen_rtx_REG (Pmode, BASE_REGISTER))); + got_symbol = gen_rtx_REG (Pmode, BASE_REGISTER); + got_symbol = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol), 101); + got_symbol = gen_rtx_PLUS (Pmode, got_symbol, pic_offset_table_rtx); + insn = emit_move_insn (pic_offset_table_rtx, got_symbol); REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, NULL_RTX, REG_NOTES (insn)); } @@ -3566,26 +5391,21 @@ s390_emit_prologue () void s390_emit_epilogue () { - struct s390_frame frame; rtx frame_pointer, return_reg; - int area_bottom, area_top, offset; + int area_bottom, area_top, offset = 0; rtvec p; - /* Compute frame_info. */ - - s390_frame_info (&frame); - /* Check whether to use frame or stack pointer for restore. */ - frame_pointer = frame.frame_pointer_p ? + frame_pointer = frame_pointer_needed ? hard_frame_pointer_rtx : stack_pointer_rtx; /* Compute which parts of the save area we need to access. */ - if (frame.first_restore_gpr != -1) + if (cfun->machine->first_restore_gpr != -1) { - area_bottom = frame.first_restore_gpr * UNITS_PER_WORD; - area_top = (frame.last_save_gpr + 1) * UNITS_PER_WORD; + area_bottom = cfun->machine->first_restore_gpr * UNITS_PER_WORD; + area_top = (cfun->machine->last_save_gpr + 1) * UNITS_PER_WORD; } else { @@ -3595,7 +5415,7 @@ s390_emit_epilogue () if (TARGET_64BIT) { - if (frame.save_fprs_p) + if (cfun->machine->save_fprs_p) { if (area_bottom > -64) area_bottom = -64; @@ -3605,14 +5425,14 @@ s390_emit_epilogue () } else { - if (regs_ever_live[18]) + if (regs_ever_live[18] && !global_regs[18]) { if (area_bottom > STACK_POINTER_OFFSET - 16) area_bottom = STACK_POINTER_OFFSET - 16; if (area_top < STACK_POINTER_OFFSET - 8) area_top = STACK_POINTER_OFFSET - 8; } - if (regs_ever_live[19]) + if (regs_ever_live[19] && !global_regs[19]) { if (area_bottom > STACK_POINTER_OFFSET - 8) area_bottom = STACK_POINTER_OFFSET - 8; @@ -3628,18 +5448,18 @@ s390_emit_epilogue () { /* Nothing to restore. */ } - else if (frame.frame_size + area_bottom >= 0 - && frame.frame_size + area_top <= 4096) + else if (cfun->machine->frame_size + area_bottom >= 0 + && cfun->machine->frame_size + area_top <= 4096) { /* Area is in range. */ - offset = frame.frame_size; + offset = cfun->machine->frame_size; } else { rtx insn, frame_off; offset = area_bottom < 0 ? -area_bottom : 0; - frame_off = GEN_INT (frame.frame_size - offset); + frame_off = GEN_INT (cfun->machine->frame_size - offset); if (!CONST_OK_FOR_LETTER_P (INTVAL (frame_off), 'K')) frame_off = force_const_mem (Pmode, frame_off); @@ -3653,7 +5473,7 @@ s390_emit_epilogue () { int i; - if (frame.save_fprs_p) + if (cfun->machine->save_fprs_p) for (i = 24; i < 32; i++) if (regs_ever_live[i] && !global_regs[i]) restore_fpr (frame_pointer, @@ -3673,16 +5493,16 @@ s390_emit_epilogue () /* Restore call saved gprs. */ - if (frame.first_restore_gpr != -1) + if (cfun->machine->first_restore_gpr != -1) { - rtx addr; + rtx insn, addr; int i; /* Check for global register and save them to stack location from where they get restored. */ - for (i = frame.first_restore_gpr; - i <= frame.last_save_gpr; + for (i = cfun->machine->first_restore_gpr; + i <= cfun->machine->last_save_gpr; i++) { /* These registers are special and need to be @@ -3690,7 +5510,7 @@ s390_emit_epilogue () if (i == STACK_POINTER_REGNUM || i == RETURN_REGNUM || i == BASE_REGISTER - || (flag_pic && i == PIC_OFFSET_TABLE_REGNUM)) + || (flag_pic && i == (int)PIC_OFFSET_TABLE_REGNUM)) continue; if (global_regs[i]) @@ -3706,8 +5526,7 @@ s390_emit_epilogue () /* Fetch return address from stack before load multiple, this will do good for scheduling. */ - if (frame.last_save_gpr >= RETURN_REGNUM - && frame.first_restore_gpr < RETURN_REGNUM) + if (!current_function_is_leaf) { int return_regnum = find_unused_clobbered_reg(); if (!return_regnum) @@ -3727,23 +5546,10 @@ s390_emit_epilogue () emit_insn (gen_blockage()); - addr = plus_constant (frame_pointer, - offset + frame.first_restore_gpr * UNITS_PER_WORD); - addr = gen_rtx_MEM (Pmode, addr); - set_mem_alias_set (addr, s390_sr_alias_set); - - if (frame.first_restore_gpr != frame.last_save_gpr) - { - emit_insn (gen_load_multiple ( - gen_rtx_REG (Pmode, frame.first_restore_gpr), - addr, - GEN_INT (frame.last_save_gpr - frame.first_restore_gpr + 1))); - } - else - { - emit_move_insn (gen_rtx_REG (Pmode, frame.first_restore_gpr), - addr); - } + insn = restore_gprs (frame_pointer, offset, + cfun->machine->first_restore_gpr, + cfun->machine->last_save_gpr); + emit_insn (insn); } /* Return to caller. */ @@ -3906,7 +5712,7 @@ s390_build_va_list () { tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; - record = make_lang_type (RECORD_TYPE); + record = (*lang_hooks.types.make_type) (RECORD_TYPE); type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); @@ -3939,9 +5745,8 @@ s390_build_va_list () } /* Implement va_start by filling the va_list structure VALIST. - STDARG_P is true if implementing __builtin_stdarg_va_start, - false if implementing __builtin_varargs_va_start. NEXTARG - points to the first anonymous stack argument. + STDARG_P is always true, and ignored. + NEXTARG points to the first anonymous stack argument. The following global variables are used to initialize the va_list structure: @@ -3953,8 +5758,7 @@ s390_build_va_list () (relative to the virtual arg pointer). */ void -s390_va_start (stdarg_p, valist, nextarg) - int stdarg_p; +s390_va_start (valist, nextarg) tree valist; rtx nextarg ATTRIBUTE_UNUSED; { @@ -3992,8 +5796,6 @@ s390_va_start (stdarg_p, valist, nextarg) off = INTVAL (current_function_arg_offset_rtx); off = off < 0 ? 0 : off; - if (! stdarg_p) - off = off > 0 ? off - UNITS_PER_WORD : off; if (TARGET_DEBUG_ARG) fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n", (int)n_gpr, (int)n_fpr, off); @@ -4200,6 +6002,134 @@ s390_va_arg (valist, type) } +/* Builtins. */ + +enum s390_builtin +{ + S390_BUILTIN_THREAD_POINTER, + S390_BUILTIN_SET_THREAD_POINTER, + + S390_BUILTIN_max +}; + +static unsigned int const code_for_builtin_64[S390_BUILTIN_max] = { + CODE_FOR_get_tp_64, + CODE_FOR_set_tp_64 +}; + +static unsigned int const code_for_builtin_31[S390_BUILTIN_max] = { + CODE_FOR_get_tp_31, + CODE_FOR_set_tp_31 +}; + +static void +s390_init_builtins () +{ + tree ftype; + + ftype = build_function_type (ptr_type_node, void_list_node); + builtin_function ("__builtin_thread_pointer", ftype, + S390_BUILTIN_THREAD_POINTER, BUILT_IN_MD, + NULL, NULL_TREE); + + ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE); + builtin_function ("__builtin_set_thread_pointer", ftype, + S390_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD, + NULL, NULL_TREE); +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +static rtx +s390_expand_builtin (exp, target, subtarget, mode, ignore) + tree exp; + rtx target; + rtx subtarget ATTRIBUTE_UNUSED; + enum machine_mode mode ATTRIBUTE_UNUSED; + int ignore ATTRIBUTE_UNUSED; +{ +#define MAX_ARGS 2 + + unsigned int const *code_for_builtin = + TARGET_64BIT ? code_for_builtin_64 : code_for_builtin_31; + + tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); + unsigned int fcode = DECL_FUNCTION_CODE (fndecl); + tree arglist = TREE_OPERAND (exp, 1); + enum insn_code icode; + rtx op[MAX_ARGS], pat; + int arity; + bool nonvoid; + + if (fcode >= S390_BUILTIN_max) + internal_error ("bad builtin fcode"); + icode = code_for_builtin[fcode]; + if (icode == 0) + internal_error ("bad builtin fcode"); + + nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node; + + for (arglist = TREE_OPERAND (exp, 1), arity = 0; + arglist; + arglist = TREE_CHAIN (arglist), arity++) + { + const struct insn_operand_data *insn_op; + + tree arg = TREE_VALUE (arglist); + if (arg == error_mark_node) + return NULL_RTX; + if (arity > MAX_ARGS) + return NULL_RTX; + + insn_op = &insn_data[icode].operand[arity + nonvoid]; + + op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, 0); + + if (!(*insn_op->predicate) (op[arity], insn_op->mode)) + op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]); + } + + if (nonvoid) + { + enum machine_mode tmode = insn_data[icode].operand[0].mode; + if (!target + || GET_MODE (target) != tmode + || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + target = gen_reg_rtx (tmode); + } + + switch (arity) + { + case 0: + pat = GEN_FCN (icode) (target); + break; + case 1: + if (nonvoid) + pat = GEN_FCN (icode) (target, op[0]); + else + pat = GEN_FCN (icode) (op[0]); + break; + case 2: + pat = GEN_FCN (icode) (target, op[0], op[1]); + break; + default: + abort (); + } + if (!pat) + return NULL_RTX; + emit_insn (pat); + + if (nonvoid) + return target; + else + return const0_rtx; +} + + /* Output assembly code for the trampoline template to stdio stream FILE. @@ -4285,7 +6215,7 @@ s390_function_profiler (file, labelno) rtx op[7]; char label[128]; - sprintf (label, "%sP%d", LPREFIX, labelno); + ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno); fprintf (file, "# function profiler \n"); @@ -4344,3 +6274,353 @@ s390_function_profiler (file, labelno) } } +/* Select section for constant in constant pool. In 32-bit mode, + constants go in the function section; in 64-bit mode in .rodata. */ + +static void +s390_select_rtx_section (mode, x, align) + enum machine_mode mode ATTRIBUTE_UNUSED; + rtx x ATTRIBUTE_UNUSED; + unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED; +{ + if (TARGET_64BIT) + readonly_data_section (); + else + function_section (current_function_decl); +} + +/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF + into its name and SYMBOL_REF_FLAG. */ + +static void +s390_encode_section_info (decl, first) + tree decl; + int first ATTRIBUTE_UNUSED; +{ + bool local_p = (*targetm.binds_local_p) (decl); + rtx rtl, symbol; + + rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl); + if (GET_CODE (rtl) != MEM) + return; + symbol = XEXP (rtl, 0); + if (GET_CODE (symbol) != SYMBOL_REF) + return; + + /* When using PIC, SYMBOL_REF_FLAG marks non-global symbols + that can be accessed directly. */ + if (flag_pic) + SYMBOL_REF_FLAG (symbol) = local_p; + + /* Encode thread-local data with %[GLil] for "global dynamic", + "local dynamic", "initial exec" or "local exec" TLS models, + respectively. */ + + if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl)) + { + const char *symbol_str = XSTR (symbol, 0); + char *newstr; + size_t len; + enum tls_model kind = decl_tls_model (decl); + + if (!flag_pic) + { + /* We don't allow non-pic code for shared libraries, + so don't generate GD/LD TLS models for non-pic code. */ + switch (kind) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + kind = TLS_MODEL_INITIAL_EXEC; break; + case TLS_MODEL_LOCAL_DYNAMIC: + kind = TLS_MODEL_LOCAL_EXEC; break; + default: + break; + } + } + + if (symbol_str[0] == '%') + { + if (symbol_str[1] == tls_model_chars[kind]) + return; + symbol_str += 2; + } + len = strlen (symbol_str) + 1; + newstr = alloca (len + 2); + + newstr[0] = '%'; + newstr[1] = tls_model_chars[kind]; + memcpy (newstr + 2, symbol_str, len); + + XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1); + } + + /* If a variable has a forced alignment to < 2 bytes, mark it + with '@' to prevent it from being used as LARL operand. */ + + else if (TREE_CODE (decl) == VAR_DECL + && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16 + && XSTR (symbol, 0)[0] != '@') + { + const char *symbol_str = XSTR (symbol, 0); + size_t len = strlen (symbol_str) + 1; + char *newstr = alloca (len + 1); + + newstr[0] = '@'; + memcpy (newstr + 1, symbol_str, len); + + XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 1 - 1); + } +} + +/* Undo the above when printing symbol names. */ + +static const char * +s390_strip_name_encoding (str) + const char *str; +{ + if (str[0] == '%') + str += 2; + if (str[0] == '@') + str += 1; + if (str[0] == '*') + str += 1; + return str; +} + +/* Output thunk to FILE that implements a C++ virtual function call (with + multiple inheritance) to FUNCTION. The thunk adjusts the this pointer + by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment + stored at VCALL_OFFSET in the vtable whose address is located at offset 0 + relative to the resulting this pointer. */ + +static void +s390_output_mi_thunk (file, thunk, delta, vcall_offset, function) + FILE *file; + tree thunk ATTRIBUTE_UNUSED; + HOST_WIDE_INT delta; + HOST_WIDE_INT vcall_offset; + tree function; +{ + rtx op[10]; + int nonlocal = 0; + + /* Operand 0 is the target function. */ + op[0] = XEXP (DECL_RTL (function), 0); + if (flag_pic && !SYMBOL_REF_FLAG (op[0])) + { + nonlocal = 1; + op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), + TARGET_64BIT ? 113 : flag_pic == 2 ? 112 : 110); + op[0] = gen_rtx_CONST (Pmode, op[0]); + } + + /* Operand 1 is the 'this' pointer. */ + if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)))) + op[1] = gen_rtx_REG (Pmode, 3); + else + op[1] = gen_rtx_REG (Pmode, 2); + + /* Operand 2 is the delta. */ + op[2] = GEN_INT (delta); + + /* Operand 3 is the vcall_offset. */ + op[3] = GEN_INT (vcall_offset); + + /* Operand 4 is the temporary register. */ + op[4] = gen_rtx_REG (Pmode, 1); + + /* Operands 5 to 8 can be used as labels. */ + op[5] = NULL_RTX; + op[6] = NULL_RTX; + op[7] = NULL_RTX; + op[8] = NULL_RTX; + + /* Operand 9 can be used for temporary register. */ + op[9] = NULL_RTX; + + /* Generate code. */ + if (TARGET_64BIT) + { + /* Setup literal pool pointer if required. */ + if (!CONST_OK_FOR_LETTER_P (delta, 'K') + || !CONST_OK_FOR_LETTER_P (vcall_offset, 'K')) + { + op[5] = gen_label_rtx (); + output_asm_insn ("larl\t%4,%5", op); + } + + /* Add DELTA to this pointer. */ + if (delta) + { + if (CONST_OK_FOR_LETTER_P (delta, 'J')) + output_asm_insn ("la\t%1,%2(%1)", op); + else if (CONST_OK_FOR_LETTER_P (delta, 'K')) + output_asm_insn ("aghi\t%1,%2", op); + else + { + op[6] = gen_label_rtx (); + output_asm_insn ("agf\t%1,%6-%5(%4)", op); + } + } + + /* Perform vcall adjustment. */ + if (vcall_offset) + { + if (CONST_OK_FOR_LETTER_P (vcall_offset, 'J')) + { + output_asm_insn ("lg\t%4,0(%1)", op); + output_asm_insn ("ag\t%1,%3(%4)", op); + } + else if (CONST_OK_FOR_LETTER_P (vcall_offset, 'K')) + { + output_asm_insn ("lghi\t%4,%3", op); + output_asm_insn ("ag\t%4,0(%1)", op); + output_asm_insn ("ag\t%1,0(%4)", op); + } + else + { + op[7] = gen_label_rtx (); + output_asm_insn ("llgf\t%4,%7-%5(%4)", op); + output_asm_insn ("ag\t%4,0(%1)", op); + output_asm_insn ("ag\t%1,0(%4)", op); + } + } + + /* Jump to target. */ + output_asm_insn ("jg\t%0", op); + + /* Output literal pool if required. */ + if (op[5]) + { + output_asm_insn (".align\t4", op); + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[5])); + } + if (op[6]) + { + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[6])); + output_asm_insn (".long\t%2", op); + } + if (op[7]) + { + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[7])); + output_asm_insn (".long\t%3", op); + } + } + else + { + /* Setup base pointer if required. */ + if (!vcall_offset + || !CONST_OK_FOR_LETTER_P (delta, 'K') + || !CONST_OK_FOR_LETTER_P (vcall_offset, 'K')) + { + op[5] = gen_label_rtx (); + output_asm_insn ("basr\t%4,0", op); + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[5])); + } + + /* Add DELTA to this pointer. */ + if (delta) + { + if (CONST_OK_FOR_LETTER_P (delta, 'J')) + output_asm_insn ("la\t%1,%2(%1)", op); + else if (CONST_OK_FOR_LETTER_P (delta, 'K')) + output_asm_insn ("ahi\t%1,%2", op); + else + { + op[6] = gen_label_rtx (); + output_asm_insn ("a\t%1,%6-%5(%4)", op); + } + } + + /* Perform vcall adjustment. */ + if (vcall_offset) + { + if (CONST_OK_FOR_LETTER_P (vcall_offset, 'J')) + { + output_asm_insn ("lg\t%4,0(%1)", op); + output_asm_insn ("a\t%1,%3(%4)", op); + } + else if (CONST_OK_FOR_LETTER_P (vcall_offset, 'K')) + { + output_asm_insn ("lhi\t%4,%3", op); + output_asm_insn ("a\t%4,0(%1)", op); + output_asm_insn ("a\t%1,0(%4)", op); + } + else + { + op[7] = gen_label_rtx (); + output_asm_insn ("l\t%4,%7-%5(%4)", op); + output_asm_insn ("a\t%4,0(%1)", op); + output_asm_insn ("a\t%1,0(%4)", op); + } + + /* We had to clobber the base pointer register. + Re-setup the base pointer (with a different base). */ + op[5] = gen_label_rtx (); + output_asm_insn ("basr\t%4,0", op); + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[5])); + } + + /* Jump to target. */ + op[8] = gen_label_rtx (); + + if (!flag_pic) + output_asm_insn ("l\t%4,%8-%5(%4)", op); + else if (!nonlocal) + output_asm_insn ("a\t%4,%8-%5(%4)", op); + /* We cannot call through .plt, since .plt requires %r12 loaded. */ + else if (flag_pic == 1) + { + output_asm_insn ("a\t%4,%8-%5(%4)", op); + output_asm_insn ("l\t%4,%0(%4)", op); + } + else if (flag_pic == 2) + { + op[9] = gen_rtx_REG (Pmode, 0); + output_asm_insn ("l\t%9,%8-4-%5(%4)", op); + output_asm_insn ("a\t%4,%8-%5(%4)", op); + output_asm_insn ("ar\t%4,%9", op); + output_asm_insn ("l\t%4,0(%4)", op); + } + + output_asm_insn ("br\t%4", op); + + /* Output literal pool. */ + output_asm_insn (".align\t4", op); + + if (nonlocal && flag_pic == 2) + output_asm_insn (".long\t%0", op); + if (nonlocal) + { + op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); + SYMBOL_REF_FLAG (op[0]) = 1; + } + + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[8])); + if (!flag_pic) + output_asm_insn (".long\t%0", op); + else + output_asm_insn (".long\t%0-%5", op); + + if (op[6]) + { + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[6])); + output_asm_insn (".long\t%2", op); + } + if (op[7]) + { + ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[7])); + output_asm_insn (".long\t%3", op); + } + } +} + +/* How to allocate a 'struct machine_function'. */ + +static struct machine_function * +s390_init_machine_status () +{ + return ggc_alloc_cleared (sizeof (struct machine_function)); +} + +#include "gt-s390.h" |