diff options
Diffstat (limited to 'contrib/gcc/config/sparc/sparc.c')
-rw-r--r-- | contrib/gcc/config/sparc/sparc.c | 1564 |
1 files changed, 538 insertions, 1026 deletions
diff --git a/contrib/gcc/config/sparc/sparc.c b/contrib/gcc/config/sparc/sparc.c index c23cbef..7c25bc5 100644 --- a/contrib/gcc/config/sparc/sparc.c +++ b/contrib/gcc/config/sparc/sparc.c @@ -1,6 +1,6 @@ /* Subroutines for insn-output.c for Sun SPARC. Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, - 1999, 2000, 2001 Free Software Foundation, Inc. + 1999, 2000, 2001, 2002 Free Software Foundation, Inc. Contributed by Michael Tiemann (tiemann@cygnus.com) 64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, at Cygnus Support. @@ -80,8 +80,6 @@ rtx sparc_compare_op0, sparc_compare_op1; sparc_nonflat_function_epilogue. */ bool sparc_emitting_epilogue; -#ifdef LEAF_REGISTERS - /* Vector to say how input registers are mapped to output registers. HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to eliminate it. You must use -fomit-frame-pointer to get that. */ @@ -119,8 +117,6 @@ char sparc_leaf_regs[] = 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}; -#endif - /* Name of where we pretend to think the frame pointer points. Normally, this is "%fp", but if we are in a leaf procedure, this is "%sp+something". We record "something" separately as it may be @@ -140,22 +136,13 @@ static int function_arg_slotno PARAMS ((const CUMULATIVE_ARGS *, static int supersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static int hypersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int)); -static int ultrasparc_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static void sparc_output_addr_vec PARAMS ((rtx)); static void sparc_output_addr_diff_vec PARAMS ((rtx)); static void sparc_output_deferred_case_vectors PARAMS ((void)); -static void sparc_add_gc_roots PARAMS ((void)); -static void mark_ultrasparc_pipeline_state PARAMS ((void *)); static int check_return_regs PARAMS ((rtx)); static int epilogue_renumber PARAMS ((rtx *, int)); static bool sparc_assemble_integer PARAMS ((rtx, unsigned int, int)); -static int ultra_cmove_results_ready_p PARAMS ((rtx)); -static int ultra_fpmode_conflict_exists PARAMS ((enum machine_mode)); -static rtx *ultra_find_type PARAMS ((int, rtx *, int)); -static void ultra_build_types_avail PARAMS ((rtx *, int)); -static void ultra_flush_pipeline PARAMS ((void)); -static void ultra_rescan_pipeline_state PARAMS ((rtx *, int)); static int set_extends PARAMS ((rtx)); static void output_restore_regs PARAMS ((FILE *, int)); static void sparc_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT)); @@ -169,21 +156,28 @@ static void sparc_nonflat_function_prologue PARAMS ((FILE *, HOST_WIDE_INT, #ifdef OBJECT_FORMAT_ELF static void sparc_elf_asm_named_section PARAMS ((const char *, unsigned int)); #endif -static void ultrasparc_sched_reorder PARAMS ((FILE *, int, rtx *, int)); -static int ultrasparc_variable_issue PARAMS ((rtx)); -static void ultrasparc_sched_init PARAMS ((void)); +static void sparc_aout_select_section PARAMS ((tree, int, + unsigned HOST_WIDE_INT)) + ATTRIBUTE_UNUSED; +static void sparc_aout_select_rtx_section PARAMS ((enum machine_mode, rtx, + unsigned HOST_WIDE_INT)) + ATTRIBUTE_UNUSED; static int sparc_adjust_cost PARAMS ((rtx, rtx, rtx, int)); static int sparc_issue_rate PARAMS ((void)); -static int sparc_variable_issue PARAMS ((FILE *, int, rtx, int)); static void sparc_sched_init PARAMS ((FILE *, int, int)); -static int sparc_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int)); +static int sparc_use_dfa_pipeline_interface PARAMS ((void)); +static int sparc_use_sched_lookahead PARAMS ((void)); static void emit_soft_tfmode_libcall PARAMS ((const char *, int, rtx *)); static void emit_soft_tfmode_binop PARAMS ((enum rtx_code, rtx *)); static void emit_soft_tfmode_unop PARAMS ((enum rtx_code, rtx *)); static void emit_soft_tfmode_cvt PARAMS ((enum rtx_code, rtx *)); static void emit_hard_tfmode_operation PARAMS ((enum rtx_code, rtx *)); + +static void sparc_encode_section_info PARAMS ((tree, int)); +static void sparc_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT, + HOST_WIDE_INT, tree)); /* Option handling. */ @@ -237,12 +231,20 @@ enum processor_type sparc_cpu; #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate -#undef TARGET_SCHED_VARIABLE_ISSUE -#define TARGET_SCHED_VARIABLE_ISSUE sparc_variable_issue #undef TARGET_SCHED_INIT #define TARGET_SCHED_INIT sparc_sched_init -#undef TARGET_SCHED_REORDER -#define TARGET_SCHED_REORDER sparc_sched_reorder +#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE +#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE sparc_use_dfa_pipeline_interface +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead + +#undef TARGET_ENCODE_SECTION_INFO +#define TARGET_ENCODE_SECTION_INFO sparc_encode_section_info + +#undef TARGET_ASM_OUTPUT_MI_THUNK +#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk +#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK +#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall struct gcc_target targetm = TARGET_INITIALIZER; @@ -279,6 +281,7 @@ sparc_override_options () { TARGET_CPU_supersparc, "supersparc" }, { TARGET_CPU_v9, "v9" }, { TARGET_CPU_ultrasparc, "ultrasparc" }, + { TARGET_CPU_ultrasparc3, "ultrasparc3" }, { 0, 0 } }; const struct cpu_default *def; @@ -311,6 +314,9 @@ sparc_override_options () /* Although insns using %y are deprecated, it is a clear win on current ultrasparcs. */ |MASK_DEPRECATED_V8_INSNS}, + /* TI ultrasparc III */ + /* ??? Check if %y issue still holds true in ultra3. */ + { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS}, { 0, 0, 0, 0 } }; const struct cpu_table *cpu; @@ -423,7 +429,9 @@ sparc_override_options () target_flags &= ~MASK_STACK_BIAS; /* Supply a default value for align_functions. */ - if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC) + if (align_functions == 0 + && (sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_ULTRASPARC3)) align_functions = 32; /* Validate PCC_STRUCT_RETURN. */ @@ -436,9 +444,6 @@ sparc_override_options () /* Do various machine dependent initializations. */ sparc_init_modes (); - - /* Register global variables with the garbage collector. */ - sparc_add_gc_roots (); } /* Miscellaneous utilities. */ @@ -457,7 +462,7 @@ v9_regcmp_p (code) /* Operand constraints. */ -/* Return non-zero only if OP is a register of mode MODE, +/* Return nonzero only if OP is a register of mode MODE, or const0_rtx. */ int @@ -478,6 +483,16 @@ reg_or_0_operand (op, mode) return 0; } +/* Return nonzero only if OP is const1_rtx. */ + +int +const1_operand (op, mode) + rtx op; + enum machine_mode mode ATTRIBUTE_UNUSED; +{ + return op == const1_rtx; +} + /* Nonzero if OP is a floating point value with value 0.0. */ int @@ -1378,9 +1393,8 @@ sparc_emit_set_const32 (op0, op1) && (INTVAL (op1) & 0x80000000) != 0) emit_insn (gen_rtx_SET (VOIDmode, temp, - gen_rtx_CONST_DOUBLE (VOIDmode, - INTVAL (op1) & ~(HOST_WIDE_INT)0x3ff, - 0))); + immed_double_const (INTVAL (op1) & ~(HOST_WIDE_INT)0x3ff, + 0, DImode))); else emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (INTVAL (op1) @@ -1403,7 +1417,7 @@ sparc_emit_set_const32 (op0, op1) } -/* Sparc-v9 code-model support. */ +/* SPARC-v9 code-model support. */ void sparc_emit_set_symbolic_const64 (op0, op1, temp1) rtx op0; @@ -1558,11 +1572,10 @@ static rtx gen_safe_XOR64 PARAMS ((rtx, HOST_WIDE_INT)); #define GEN_INT64(__x) GEN_INT (__x) #else #define GEN_HIGHINT64(__x) \ - gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & ~(HOST_WIDE_INT)0x3ff, 0) + immed_double_const ((__x) & ~(HOST_WIDE_INT)0x3ff, 0, DImode) #define GEN_INT64(__x) \ - gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & 0xffffffff, \ - ((__x) & 0x80000000 \ - ? -1 : 0)) + immed_double_const ((__x) & 0xffffffff, \ + ((__x) & 0x80000000 ? -1 : 0), DImode) #endif /* The optimizer is not to assume anything about exactly @@ -2132,9 +2145,9 @@ sparc_emit_set_const64 (op0, op1) negated_const = GEN_INT (((~low_bits) & 0xfffffc00) | (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32)); #else - negated_const = gen_rtx_CONST_DOUBLE (DImode, - (~low_bits) & 0xfffffc00, - (~high_bits) & 0xffffffff); + negated_const = immed_double_const ((~low_bits) & 0xfffffc00, + (~high_bits) & 0xffffffff, + DImode); #endif sparc_emit_set_const64 (temp, negated_const); } @@ -3055,17 +3068,6 @@ check_return_regs (x) } -/* Return 1 if TRIAL references only in and global registers. */ -int -eligible_for_return_delay (trial) - rtx trial; -{ - if (GET_CODE (PATTERN (trial)) != SET) - return 0; - - return check_return_regs (PATTERN (trial)); -} - int short_branch (uid1, uid2) int uid1, uid2; @@ -3079,7 +3081,7 @@ short_branch (uid1, uid2) return 0; } -/* Return non-zero if REG is not used after INSN. +/* Return nonzero if REG is not used after INSN. We assume REG is a reload reg, and therefore does not live past labels or calls or jumps. */ int @@ -3115,10 +3117,10 @@ reg_unused_after (reg, insn) } /* The table we use to reference PIC data. */ -static rtx global_offset_table; +static GTY(()) rtx global_offset_table; /* The function we use to get at it. */ -static rtx get_pc_symbol; +static GTY(()) rtx get_pc_symbol; static char get_pc_symbol_name[256]; /* Ensure that we are not using patterns that are not OK with PIC. */ @@ -3163,7 +3165,7 @@ pic_address_needs_scratch (x) /* Legitimize PIC addresses. If the address is already position-independent, we return ORIG. Newly generated position-independent addresses go into a - reg. This is REG if non zero, otherwise we allocate register(s) as + reg. This is REG if nonzero, otherwise we allocate register(s) as necessary. */ rtx @@ -3391,7 +3393,7 @@ mem_min_alignment (mem, desired) /* Vectors to keep interesting information about registers where it can easily - be got. We use to use the actual mode value as the bit number, but there + be got. We used to use the actual mode value as the bit number, but there are more than 32 modes now. Instead we use two tables: one indexed by hard register number, and one indexed by mode. */ @@ -4520,10 +4522,13 @@ function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding) struct function_arg_record_value_parms { - rtx ret; - int slotno, named, regbase; - unsigned int nregs; - int intoffset; + rtx ret; /* return expression being built. */ + int slotno; /* slot number of the argument. */ + int named; /* whether the argument is named. */ + int regbase; /* regno of the base register. */ + int stack; /* 1 if part of the argument is on the stack. */ + int intoffset; /* offset of the pending integer field. */ + unsigned int nregs; /* number of words passed in registers. */ }; static void function_arg_record_value_3 @@ -4598,8 +4603,13 @@ function_arg_record_value_1 (type, startbitpos, parms) this_slotno = parms->slotno + parms->intoffset / BITS_PER_WORD; - intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno); - intslots = MAX (intslots, 0); + if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) + { + intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); + /* We need to pass this field on the stack. */ + parms->stack = 1; + } + parms->nregs += intslots; parms->intoffset = -1; } @@ -4664,7 +4674,7 @@ function_arg_record_value_3 (bitpos, parms) { regno = parms->regbase + this_slotno; reg = gen_rtx_REG (mode, regno); - XVECEXP (parms->ret, 0, parms->nregs) + XVECEXP (parms->ret, 0, parms->stack + parms->nregs) = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset)); this_slotno += 1; @@ -4737,7 +4747,7 @@ function_arg_record_value_2 (type, startbitpos, parms) default: break; } reg = gen_rtx_REG (mode, regno); - XVECEXP (parms->ret, 0, parms->nregs) + XVECEXP (parms->ret, 0, parms->stack + parms->nregs) = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (bitpos / BITS_PER_UNIT)); parms->nregs += 1; @@ -4745,7 +4755,7 @@ function_arg_record_value_2 (type, startbitpos, parms) { regno += GET_MODE_SIZE (mode) / 4; reg = gen_rtx_REG (mode, regno); - XVECEXP (parms->ret, 0, parms->nregs) + XVECEXP (parms->ret, 0, parms->stack + parms->nregs) = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT ((bitpos + GET_MODE_BITSIZE (mode)) / BITS_PER_UNIT)); @@ -4762,8 +4772,19 @@ function_arg_record_value_2 (type, startbitpos, parms) } /* Used by function_arg and function_value to implement the complex - Sparc64 structure calling conventions. */ + conventions of the 64-bit ABI for passing and returning structures. + Return an expression valid as a return value for the two macros + FUNCTION_ARG and FUNCTION_VALUE. + TYPE is the data type of the argument (as a tree). + This is null for libcalls where that information may + not be available. + MODE is the argument's machine mode. + SLOTNO is the index number of the argument's slot in the parameter array. + NAMED is nonzero if this argument is a named parameter + (otherwise it is an extra parameter matching an ellipsis). + REGBASE is the regno of the base register for the parameter array. */ + static rtx function_arg_record_value (type, mode, slotno, named, regbase) tree type; @@ -4778,6 +4799,7 @@ function_arg_record_value (type, mode, slotno, named, regbase) parms.slotno = slotno; parms.named = named; parms.regbase = regbase; + parms.stack = 0; /* Compute how many registers we need. */ parms.nregs = 0; @@ -4794,8 +4816,12 @@ function_arg_record_value (type, mode, slotno, named, regbase) intslots = (endbit - startbit) / BITS_PER_WORD; this_slotno = slotno + parms.intoffset / BITS_PER_WORD; - intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno); - intslots = MAX (intslots, 0); + if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno) + { + intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno); + /* We need to pass this field on the stack. */ + parms.stack = 1; + } parms.nregs += intslots; } @@ -4825,7 +4851,17 @@ function_arg_record_value (type, mode, slotno, named, regbase) if (nregs == 0) abort (); - parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); + parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs)); + + /* If at least one field must be passed on the stack, generate + (parallel [(expr_list (nil) ...) ...]) so that all fields will + also be passed on the stack. We can't do much better because the + semantics of FUNCTION_ARG_PARTIAL_NREGS doesn't handle the case + of structures for which the fields passed exclusively in registers + are not at the beginning of the structure. */ + if (parms.stack) + XVECEXP (parms.ret, 0, 0) + = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx); /* Fill in the entries. */ parms.nregs = 0; @@ -5249,13 +5285,12 @@ sparc_builtin_saveregs () /* Implement `va_start' for varargs and stdarg. */ void -sparc_va_start (stdarg_p, valist, nextarg) - int stdarg_p ATTRIBUTE_UNUSED; +sparc_va_start (valist, nextarg) tree valist; rtx nextarg; { nextarg = expand_builtin_saveregs (); - std_expand_builtin_va_start (1, valist, nextarg); + std_expand_builtin_va_start (valist, nextarg); } /* Implement `va_arg'. */ @@ -5353,7 +5388,8 @@ sparc_va_arg (valist, type) PUT_MODE (tmp, BLKmode); set_mem_alias_set (tmp, 0); - dest_addr = emit_block_move (tmp, addr_rtx, GEN_INT (rsize)); + dest_addr = emit_block_move (tmp, addr_rtx, GEN_INT (rsize), + BLOCK_OP_NORMAL); if (dest_addr != NULL_RTX) addr_rtx = dest_addr; else @@ -5375,11 +5411,11 @@ sparc_va_arg (valist, type) XEXP (OP, 0) is assumed to be a condition code register (integer or floating point) and its mode specifies what kind of comparison we made. - REVERSED is non-zero if we should reverse the sense of the comparison. + REVERSED is nonzero if we should reverse the sense of the comparison. - ANNUL is non-zero if we should generate an annulling branch. + ANNUL is nonzero if we should generate an annulling branch. - NOOP is non-zero if we have to follow this branch by a noop. + NOOP is nonzero if we have to follow this branch by a noop. INSN, if set, is the insn. */ @@ -5808,11 +5844,11 @@ sparc_emit_floatunsdi (operands) operand number of the reg. OP is the conditional expression. The mode of REG says what kind of comparison we made. - REVERSED is non-zero if we should reverse the sense of the comparison. + REVERSED is nonzero if we should reverse the sense of the comparison. - ANNUL is non-zero if we should generate an annulling branch. + ANNUL is nonzero if we should generate an annulling branch. - NOOP is non-zero if we have to follow this branch by a noop. */ + NOOP is nonzero if we have to follow this branch by a noop. */ char * output_v9branch (op, dest, reg, label, reversed, annul, noop, insn) @@ -6462,6 +6498,24 @@ print_operand (file, x, code) output_address (XEXP (x, 0)); return; + case 's': + { + /* Print a sign-extended 32-bit value. */ + HOST_WIDE_INT i; + if (GET_CODE(x) == CONST_INT) + i = INTVAL (x); + else if (GET_CODE(x) == CONST_DOUBLE) + i = CONST_DOUBLE_LOW (x); + else + { + output_operand_lossage ("invalid %%s operand"); + return; + } + i = trunc_int_for_mode (i, SImode); + fprintf (file, HOST_WIDE_INT_PRINT_DEC, i); + return; + } + case 0: /* Do nothing special. */ break; @@ -6548,10 +6602,6 @@ sparc_assemble_integer (x, size, aligned_p) what kind of result this function returns. For non-C types, we pick the closest C type. */ -#ifndef CHAR_TYPE_SIZE -#define CHAR_TYPE_SIZE BITS_PER_UNIT -#endif - #ifndef SHORT_TYPE_SIZE #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2) #endif @@ -6757,7 +6807,8 @@ sparc_initialize_trampoline (tramp, fnaddr, cxt) /* On UltraSPARC a flush flushes an entire cache line. The trampoline is aligned on a 16 byte boundary so one flush clears it all. */ emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp)))); - if (sparc_cpu != PROCESSOR_ULTRASPARC) + if (sparc_cpu != PROCESSOR_ULTRASPARC + && sparc_cpu != PROCESSOR_ULTRASPARC3) emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, plus_constant (tramp, 8))))); } @@ -6795,7 +6846,8 @@ sparc64_initialize_trampoline (tramp, fnaddr, cxt) emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr); emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp)))); - if (sparc_cpu != PROCESSOR_ULTRASPARC) + if (sparc_cpu != PROCESSOR_ULTRASPARC + && sparc_cpu != PROCESSOR_ULTRASPARC3) emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8))))); } @@ -7648,157 +7700,6 @@ hypersparc_adjust_cost (insn, link, dep_insn, cost) } static int -ultrasparc_adjust_cost (insn, link, dep_insn, cost) - rtx insn; - rtx link; - rtx dep_insn; - int cost; -{ - enum attr_type insn_type, dep_type; - rtx pat = PATTERN(insn); - rtx dep_pat = PATTERN (dep_insn); - - if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0) - return cost; - - insn_type = get_attr_type (insn); - dep_type = get_attr_type (dep_insn); - - /* Nothing issues in parallel with integer multiplies, so - mark as zero cost since the scheduler can not do anything - about it. */ - if (insn_type == TYPE_IMUL || insn_type == TYPE_IDIV) - return 0; - -#define SLOW_FP(dep_type) \ -(dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \ - dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD) - - switch (REG_NOTE_KIND (link)) - { - case 0: - /* Data dependency; DEP_INSN writes a register that INSN reads some - cycles later. */ - - if (dep_type == TYPE_CMOVE) - { - /* Instructions that read the result of conditional moves cannot - be in the same group or the following group. */ - return cost + 1; - } - - switch (insn_type) - { - /* UltraSPARC can dual issue a store and an instruction setting - the value stored, except for divide and square root. */ - case TYPE_FPSTORE: - if (! SLOW_FP (dep_type)) - return 0; - return cost; - - case TYPE_STORE: - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) - return cost; - - if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) - /* The dependency between the two instructions is on the data - that is being stored. Assume that the address of the store - is not also dependent. */ - return 0; - return cost; - - case TYPE_LOAD: - case TYPE_SLOAD: - case TYPE_FPLOAD: - /* A load does not return data until at least 11 cycles after - a store to the same location. 3 cycles are accounted for - in the load latency; add the other 8 here. */ - if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE) - { - /* If the addresses are not equal this may be a false - dependency because pointer aliasing could not be - determined. Add only 2 cycles in that case. 2 is - an arbitrary compromise between 8, which would cause - the scheduler to generate worse code elsewhere to - compensate for a dependency which might not really - exist, and 0. */ - if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET - || GET_CODE (SET_SRC (pat)) != MEM - || GET_CODE (SET_DEST (dep_pat)) != MEM - || ! rtx_equal_p (XEXP (SET_SRC (pat), 0), - XEXP (SET_DEST (dep_pat), 0))) - return cost + 2; - - return cost + 8; - } - return cost; - - case TYPE_BRANCH: - /* Compare to branch latency is 0. There is no benefit from - separating compare and branch. */ - if (dep_type == TYPE_COMPARE) - return 0; - /* Floating point compare to branch latency is less than - compare to conditional move. */ - if (dep_type == TYPE_FPCMP) - return cost - 1; - return cost; - - case TYPE_FPCMOVE: - /* FMOVR class instructions can not issue in the same cycle - or the cycle after an instruction which writes any - integer register. Model this as cost 2 for dependent - instructions. */ - if (dep_type == TYPE_IALU - && cost < 2) - return 2; - /* Otherwise check as for integer conditional moves. */ - - case TYPE_CMOVE: - /* Conditional moves involving integer registers wait until - 3 cycles after loads return data. The interlock applies - to all loads, not just dependent loads, but that is hard - to model. */ - if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD) - return cost + 3; - return cost; - - default: - break; - } - break; - - case REG_DEP_ANTI: - /* Divide and square root lock destination registers for full latency. */ - if (! SLOW_FP (dep_type)) - return 0; - break; - - case REG_DEP_OUTPUT: - /* IEU and FPU instruction that have the same destination - register cannot be grouped together. */ - return cost + 1; - - default: - break; - } - - /* Other costs not accounted for: - - Single precision floating point loads lock the other half of - the even/odd register pair. - - Several hazards associated with ldd/std are ignored because these - instructions are rarely generated for V9. - - The floating point pipeline can not have both a single and double - precision operation active at the same time. Format conversions - and graphics instructions are given honorary double precision status. - - call and jmpl are always the first instruction in a group. */ - - return cost; - -#undef SLOW_FP -} - -static int sparc_adjust_cost(insn, link, dep, cost) rtx insn; rtx link; @@ -7814,792 +7715,63 @@ sparc_adjust_cost(insn, link, dep, cost) case PROCESSOR_SPARCLITE86X: cost = hypersparc_adjust_cost (insn, link, dep, cost); break; - case PROCESSOR_ULTRASPARC: - cost = ultrasparc_adjust_cost (insn, link, dep, cost); - break; default: break; } return cost; } -/* This describes the state of the UltraSPARC pipeline during - instruction scheduling. */ - -#define TMASK(__x) ((unsigned)1 << ((int)(__x))) -#define UMASK(__x) ((unsigned)1 << ((int)(__x))) - -enum ultra_code { NONE=0, /* no insn at all */ - IEU0, /* shifts and conditional moves */ - IEU1, /* condition code setting insns, calls+jumps */ - IEUN, /* all other single cycle ieu insns */ - LSU, /* loads and stores */ - CTI, /* branches */ - FPM, /* FPU pipeline 1, multiplies and divides */ - FPA, /* FPU pipeline 2, all other operations */ - SINGLE, /* single issue instructions */ - NUM_ULTRA_CODES }; - -static enum ultra_code ultra_code_from_mask PARAMS ((int)); -static void ultra_schedule_insn PARAMS ((rtx *, rtx *, int, enum ultra_code)); - -static const char *const ultra_code_names[NUM_ULTRA_CODES] = { - "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI", - "FPM", "FPA", "SINGLE" }; - -struct ultrasparc_pipeline_state { - /* The insns in this group. */ - rtx group[4]; - - /* The code for each insn. */ - enum ultra_code codes[4]; - - /* Which insns in this group have been committed by the - scheduler. This is how we determine how many more - can issue this cycle. */ - char commit[4]; - - /* How many insns in this group. */ - char group_size; - - /* Mask of free slots still in this group. */ - char free_slot_mask; - - /* The slotter uses the following to determine what other - insn types can still make their way into this group. */ - char contents [NUM_ULTRA_CODES]; - char num_ieu_insns; -}; - -#define ULTRA_NUM_HIST 8 -static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST]; -static int ultra_cur_hist; -static int ultra_cycles_elapsed; - -#define ultra_pipe (ultra_pipe_hist[ultra_cur_hist]) - -/* Given TYPE_MASK compute the ultra_code it has. */ -static enum ultra_code -ultra_code_from_mask (type_mask) - int type_mask; -{ - if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE))) - return IEU0; - else if (type_mask & (TMASK (TYPE_COMPARE) | - TMASK (TYPE_CALL) | - TMASK (TYPE_SIBCALL) | - TMASK (TYPE_UNCOND_BRANCH))) - return IEU1; - else if (type_mask & TMASK (TYPE_IALU)) - return IEUN; - else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) | - TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) | - TMASK (TYPE_FPSTORE))) - return LSU; - else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) | - TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) | - TMASK (TYPE_FPSQRTD))) - return FPM; - else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) | - TMASK (TYPE_FP) | TMASK (TYPE_FPCMP))) - return FPA; - else if (type_mask & TMASK (TYPE_BRANCH)) - return CTI; - - return SINGLE; -} - -/* Check INSN (a conditional move) and make sure that it's - results are available at this cycle. Return 1 if the - results are in fact ready. */ -static int -ultra_cmove_results_ready_p (insn) - rtx insn; -{ - struct ultrasparc_pipeline_state *up; - int entry, slot; - - /* If this got dispatched in the previous - group, the results are not ready. */ - entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1); - up = &ultra_pipe_hist[entry]; - slot = 4; - while (--slot >= 0) - if (up->group[slot] == insn) - return 0; - - return 1; -} - -/* Walk backwards in pipeline history looking for FPU - operations which use a mode different than FPMODE and - will create a stall if an insn using FPMODE were to be - dispatched this cycle. */ -static int -ultra_fpmode_conflict_exists (fpmode) - enum machine_mode fpmode; -{ - int hist_ent; - int hist_lim; - - hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1); - if (ultra_cycles_elapsed < 4) - hist_lim = ultra_cycles_elapsed; - else - hist_lim = 4; - while (hist_lim > 0) - { - struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent]; - int slot = 4; - - while (--slot >= 0) - { - rtx insn = up->group[slot]; - enum machine_mode this_mode; - rtx pat; - - if (! insn - || GET_CODE (insn) != INSN - || (pat = PATTERN (insn)) == 0 - || GET_CODE (pat) != SET) - continue; - - this_mode = GET_MODE (SET_DEST (pat)); - if ((this_mode != SFmode - && this_mode != DFmode) - || this_mode == fpmode) - continue; - - /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then - we will get a stall. Loads and stores are independent - of these rules. */ - if (GET_CODE (SET_SRC (pat)) != ABS - && GET_CODE (SET_SRC (pat)) != NEG - && ((TMASK (get_attr_type (insn)) & - (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) | - TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) | - TMASK (TYPE_FPSQRTD) | - TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0)) - return 1; - } - hist_lim--; - hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1); - } - - /* No conflicts, safe to dispatch. */ - return 0; -} - -/* Find an instruction in LIST which has one of the - type attributes enumerated in TYPE_MASK. START - says where to begin the search. - - NOTE: This scheme depends upon the fact that we - have less than 32 distinct type attributes. */ - -static int ultra_types_avail; - -static rtx * -ultra_find_type (type_mask, list, start) - int type_mask; - rtx *list; - int start; -{ - int i; - - /* Short circuit if no such insn exists in the ready - at the moment. */ - if ((type_mask & ultra_types_avail) == 0) - return 0; - - for (i = start; i >= 0; i--) - { - rtx insn = list[i]; - - if (recog_memoized (insn) >= 0 - && (TMASK(get_attr_type (insn)) & type_mask)) - { - enum machine_mode fpmode = SFmode; - rtx pat = 0; - int slot; - int check_depend = 0; - int check_fpmode_conflict = 0; - - if (GET_CODE (insn) == INSN - && (pat = PATTERN(insn)) != 0 - && GET_CODE (pat) == SET - && !(type_mask & (TMASK (TYPE_STORE) | - TMASK (TYPE_FPSTORE)))) - { - check_depend = 1; - if (GET_MODE (SET_DEST (pat)) == SFmode - || GET_MODE (SET_DEST (pat)) == DFmode) - { - fpmode = GET_MODE (SET_DEST (pat)); - check_fpmode_conflict = 1; - } - } - - slot = 4; - while(--slot >= 0) - { - rtx slot_insn = ultra_pipe.group[slot]; - rtx slot_pat; - - /* Already issued, bad dependency, or FPU - mode conflict. */ - if (slot_insn != 0 - && (slot_pat = PATTERN (slot_insn)) != 0 - && ((insn == slot_insn) - || (check_depend == 1 - && GET_CODE (slot_insn) == INSN - && GET_CODE (slot_pat) == SET - && ((GET_CODE (SET_DEST (slot_pat)) == REG - && GET_CODE (SET_SRC (pat)) == REG - && REGNO (SET_DEST (slot_pat)) == - REGNO (SET_SRC (pat))) - || (GET_CODE (SET_DEST (slot_pat)) == SUBREG - && GET_CODE (SET_SRC (pat)) == SUBREG - && REGNO (SUBREG_REG (SET_DEST (slot_pat))) == - REGNO (SUBREG_REG (SET_SRC (pat))) - && SUBREG_BYTE (SET_DEST (slot_pat)) == - SUBREG_BYTE (SET_SRC (pat))))) - || (check_fpmode_conflict == 1 - && GET_CODE (slot_insn) == INSN - && GET_CODE (slot_pat) == SET - && (GET_MODE (SET_DEST (slot_pat)) == SFmode - || GET_MODE (SET_DEST (slot_pat)) == DFmode) - && GET_MODE (SET_DEST (slot_pat)) != fpmode))) - goto next; - } - - /* Check for peculiar result availability and dispatch - interference situations. */ - if (pat != 0 - && ultra_cycles_elapsed > 0) - { - rtx link; - - for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) - { - rtx link_insn = XEXP (link, 0); - if (GET_CODE (link_insn) == INSN - && recog_memoized (link_insn) >= 0 - && (TMASK (get_attr_type (link_insn)) & - (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE))) - && ! ultra_cmove_results_ready_p (link_insn)) - goto next; - } - - if (check_fpmode_conflict - && ultra_fpmode_conflict_exists (fpmode)) - goto next; - } - - return &list[i]; - } - next: - ; - } - return 0; -} - -static void -ultra_build_types_avail (ready, n_ready) - rtx *ready; - int n_ready; -{ - int i = n_ready - 1; - - ultra_types_avail = 0; - while(i >= 0) - { - rtx insn = ready[i]; - - if (recog_memoized (insn) >= 0) - ultra_types_avail |= TMASK (get_attr_type (insn)); - - i -= 1; - } -} - -/* Place insn pointed to my IP into the pipeline. - Make element THIS of READY be that insn if it - is not already. TYPE indicates the pipeline class - this insn falls into. */ -static void -ultra_schedule_insn (ip, ready, this, type) - rtx *ip; - rtx *ready; - int this; - enum ultra_code type; -{ - int pipe_slot; - char mask = ultra_pipe.free_slot_mask; - rtx temp; - - /* Obtain free slot. */ - for (pipe_slot = 0; pipe_slot < 4; pipe_slot++) - if ((mask & (1 << pipe_slot)) != 0) - break; - if (pipe_slot == 4) - abort (); - - /* In it goes, and it hasn't been committed yet. */ - ultra_pipe.group[pipe_slot] = *ip; - ultra_pipe.codes[pipe_slot] = type; - ultra_pipe.contents[type] = 1; - if (UMASK (type) & - (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1))) - ultra_pipe.num_ieu_insns += 1; - - ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot)); - ultra_pipe.group_size += 1; - ultra_pipe.commit[pipe_slot] = 0; - - /* Update ready list. */ - temp = *ip; - while (ip != &ready[this]) - { - ip[0] = ip[1]; - ++ip; - } - *ip = temp; -} - -/* Advance to the next pipeline group. */ -static void -ultra_flush_pipeline () -{ - ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1); - ultra_cycles_elapsed += 1; - memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe); - ultra_pipe.free_slot_mask = 0xf; -} - -/* Init our data structures for this current block. */ -static void -ultrasparc_sched_init () -{ - memset ((char *) ultra_pipe_hist, 0, sizeof ultra_pipe_hist); - ultra_cur_hist = 0; - ultra_cycles_elapsed = 0; - ultra_pipe.free_slot_mask = 0xf; -} - static void sparc_sched_init (dump, sched_verbose, max_ready) FILE *dump ATTRIBUTE_UNUSED; int sched_verbose ATTRIBUTE_UNUSED; int max_ready ATTRIBUTE_UNUSED; { - if (sparc_cpu == PROCESSOR_ULTRASPARC) - ultrasparc_sched_init (); } -/* INSN has been scheduled, update pipeline commit state - and return how many instructions are still to be - scheduled in this group. */ static int -ultrasparc_variable_issue (insn) - rtx insn; +sparc_use_dfa_pipeline_interface () { - struct ultrasparc_pipeline_state *up = &ultra_pipe; - int i, left_to_fire; - - left_to_fire = 0; - for (i = 0; i < 4; i++) - { - if (up->group[i] == 0) - continue; - - if (up->group[i] == insn) - { - up->commit[i] = 1; - } - else if (! up->commit[i]) - left_to_fire++; - } - - return left_to_fire; + if ((1 << sparc_cpu) & + ((1 << PROCESSOR_ULTRASPARC) | (1 << PROCESSOR_CYPRESS) | + (1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | + (1 << PROCESSOR_SPARCLITE86X) | (1 << PROCESSOR_TSC701) | + (1 << PROCESSOR_ULTRASPARC3))) + return 1; + return 0; } static int -sparc_variable_issue (dump, sched_verbose, insn, cim) - FILE *dump ATTRIBUTE_UNUSED; - int sched_verbose ATTRIBUTE_UNUSED; - rtx insn; - int cim; -{ - if (sparc_cpu == PROCESSOR_ULTRASPARC) - return ultrasparc_variable_issue (insn); - else - return cim - 1; -} - -/* In actual_hazard_this_instance, we may have yanked some - instructions from the ready list due to conflict cost - adjustments. If so, and such an insn was in our pipeline - group, remove it and update state. */ -static void -ultra_rescan_pipeline_state (ready, n_ready) - rtx *ready; - int n_ready; -{ - struct ultrasparc_pipeline_state *up = &ultra_pipe; - int i; - - for (i = 0; i < 4; i++) - { - rtx insn = up->group[i]; - int j; - - if (! insn) - continue; - - /* If it has been committed, then it was removed from - the ready list because it was actually scheduled, - and that is not the case we are searching for here. */ - if (up->commit[i] != 0) - continue; - - for (j = n_ready - 1; j >= 0; j--) - if (ready[j] == insn) - break; - - /* If we didn't find it, toss it. */ - if (j < 0) - { - enum ultra_code ucode = up->codes[i]; - - up->group[i] = 0; - up->codes[i] = NONE; - up->contents[ucode] = 0; - if (UMASK (ucode) & - (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1))) - up->num_ieu_insns -= 1; - - up->free_slot_mask |= (1 << i); - up->group_size -= 1; - up->commit[i] = 0; - } - } -} - -static void -ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready) - FILE *dump; - int sched_verbose; - rtx *ready; - int n_ready; -{ - struct ultrasparc_pipeline_state *up = &ultra_pipe; - int i, this_insn; - - if (sched_verbose) - { - int n; - - fprintf (dump, "\n;;\tUltraSPARC Looking at ["); - for (n = n_ready - 1; n >= 0; n--) - { - rtx insn = ready[n]; - enum ultra_code ucode; - - if (recog_memoized (insn) < 0) - continue; - ucode = ultra_code_from_mask (TMASK (get_attr_type (insn))); - if (n != 0) - fprintf (dump, "%s(%d) ", - ultra_code_names[ucode], - INSN_UID (insn)); - else - fprintf (dump, "%s(%d)", - ultra_code_names[ucode], - INSN_UID (insn)); - } - fprintf (dump, "]\n"); - } - - this_insn = n_ready - 1; - - /* Skip over junk we don't understand. */ - while ((this_insn >= 0) - && recog_memoized (ready[this_insn]) < 0) - this_insn--; - - ultra_build_types_avail (ready, this_insn + 1); - - while (this_insn >= 0) { - int old_group_size = up->group_size; - - if (up->group_size != 0) - { - int num_committed; - - num_committed = (up->commit[0] + up->commit[1] + - up->commit[2] + up->commit[3]); - /* If nothing has been commited from our group, or all of - them have. Clear out the (current cycle's) pipeline - state and start afresh. */ - if (num_committed == 0 - || num_committed == up->group_size) - { - ultra_flush_pipeline (); - up = &ultra_pipe; - old_group_size = 0; - } - else - { - /* OK, some ready list insns got requeued and thus removed - from the ready list. Account for this fact. */ - ultra_rescan_pipeline_state (ready, n_ready); - - /* Something "changed", make this look like a newly - formed group so the code at the end of the loop - knows that progress was in fact made. */ - if (up->group_size != old_group_size) - old_group_size = 0; - } - } - - if (up->group_size == 0) - { - /* If the pipeline is (still) empty and we have any single - group insns, get them out now as this is a good time. */ - rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_IDIV) | - TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) | - TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)), - ready, this_insn); - if (ip) - { - ultra_schedule_insn (ip, ready, this_insn, SINGLE); - break; - } - - /* If we are not in the process of emptying out the pipe, try to - obtain an instruction which must be the first in it's group. */ - ip = ultra_find_type ((TMASK (TYPE_CALL) | - TMASK (TYPE_SIBCALL) | - TMASK (TYPE_CALL_NO_DELAY_SLOT) | - TMASK (TYPE_UNCOND_BRANCH)), - ready, this_insn); - if (ip) - { - ultra_schedule_insn (ip, ready, this_insn, IEU1); - this_insn--; - } - else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) | - TMASK (TYPE_FPDIVD) | - TMASK (TYPE_FPSQRTS) | - TMASK (TYPE_FPSQRTD)), - ready, this_insn)) != 0) - { - ultra_schedule_insn (ip, ready, this_insn, FPM); - this_insn--; - } - } - - /* Try to fill the integer pipeline. First, look for an IEU0 specific - operation. We can't do more IEU operations if the first 3 slots are - all full or we have dispatched two IEU insns already. */ - if ((up->free_slot_mask & 0x7) != 0 - && up->num_ieu_insns < 2 - && up->contents[IEU0] == 0 - && up->contents[IEUN] == 0) - { - rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn); - if (ip) - { - ultra_schedule_insn (ip, ready, this_insn, IEU0); - this_insn--; - } - } - - /* If we can, try to find an IEU1 specific or an unnamed - IEU instruction. */ - if ((up->free_slot_mask & 0x7) != 0 - && up->num_ieu_insns < 2) - { - rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) | - (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)), - ready, this_insn); - if (ip) - { - rtx insn = *ip; - - ultra_schedule_insn (ip, ready, this_insn, - (!up->contents[IEU1] - && get_attr_type (insn) == TYPE_COMPARE) - ? IEU1 : IEUN); - this_insn--; - } - } - - /* If only one IEU insn has been found, try to find another unnamed - IEU operation or an IEU1 specific one. */ - if ((up->free_slot_mask & 0x7) != 0 - && up->num_ieu_insns < 2) - { - rtx *ip; - int tmask = TMASK (TYPE_IALU); - - if (!up->contents[IEU1]) - tmask |= TMASK (TYPE_COMPARE); - ip = ultra_find_type (tmask, ready, this_insn); - if (ip) - { - rtx insn = *ip; - - ultra_schedule_insn (ip, ready, this_insn, - (!up->contents[IEU1] - && get_attr_type (insn) == TYPE_COMPARE) - ? IEU1 : IEUN); - this_insn--; - } - } - - /* Try for a load or store, but such an insn can only be issued - if it is within' one of the first 3 slots. */ - if ((up->free_slot_mask & 0x7) != 0 - && up->contents[LSU] == 0) - { - rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) | - TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) | - TMASK (TYPE_FPSTORE)), ready, this_insn); - if (ip) - { - ultra_schedule_insn (ip, ready, this_insn, LSU); - this_insn--; - } - } - - /* Now find FPU operations, first FPM class. But not divisions or - square-roots because those will break the group up. Unlike all - the previous types, these can go in any slot. */ - if (up->free_slot_mask != 0 - && up->contents[FPM] == 0) - { - rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn); - if (ip) - { - ultra_schedule_insn (ip, ready, this_insn, FPM); - this_insn--; - } - } - - /* Continue on with FPA class if we have not filled the group already. */ - if (up->free_slot_mask != 0 - && up->contents[FPA] == 0) - { - rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) | - TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)), - ready, this_insn); - if (ip) - { - ultra_schedule_insn (ip, ready, this_insn, FPA); - this_insn--; - } - } - - /* Finally, maybe stick a branch in here. */ - if (up->free_slot_mask != 0 - && up->contents[CTI] == 0) - { - rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn); - - /* Try to slip in a branch only if it is one of the - next 2 in the ready list. */ - if (ip && ((&ready[this_insn] - ip) < 2)) - { - ultra_schedule_insn (ip, ready, this_insn, CTI); - this_insn--; - } - } - - up->group_size = 0; - for (i = 0; i < 4; i++) - if ((up->free_slot_mask & (1 << i)) == 0) - up->group_size++; - - /* See if we made any progress... */ - if (old_group_size != up->group_size) - break; - - /* Clean out the (current cycle's) pipeline state - and try once more. If we placed no instructions - into the pipeline at all, it means a real hard - conflict exists with some earlier issued instruction - so we must advance to the next cycle to clear it up. */ - if (up->group_size == 0) - { - ultra_flush_pipeline (); - up = &ultra_pipe; - } - else - { - memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe); - ultra_pipe.free_slot_mask = 0xf; - } - } - - if (sched_verbose) - { - int n, gsize; - - fprintf (dump, ";;\tUltraSPARC Launched ["); - gsize = up->group_size; - for (n = 0; n < 4; n++) - { - rtx insn = up->group[n]; - - if (! insn) - continue; - - gsize -= 1; - if (gsize != 0) - fprintf (dump, "%s(%d) ", - ultra_code_names[up->codes[n]], - INSN_UID (insn)); - else - fprintf (dump, "%s(%d)", - ultra_code_names[up->codes[n]], - INSN_UID (insn)); - } - fprintf (dump, "]\n"); - } +sparc_use_sched_lookahead () +{ + if (sparc_cpu == PROCESSOR_ULTRASPARC + || sparc_cpu == PROCESSOR_ULTRASPARC3) + return 4; + if ((1 << sparc_cpu) & + ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) | + (1 << PROCESSOR_SPARCLITE86X))) + return 3; + return 0; } static int -sparc_sched_reorder (dump, sched_verbose, ready, n_readyp, clock) - FILE *dump; - int sched_verbose; - rtx *ready; - int *n_readyp; - int clock ATTRIBUTE_UNUSED; -{ - if (sparc_cpu == PROCESSOR_ULTRASPARC) - ultrasparc_sched_reorder (dump, sched_verbose, ready, *n_readyp); - return sparc_issue_rate (); -} - -static int sparc_issue_rate () { switch (sparc_cpu) { - default: - return 1; - case PROCESSOR_V9: + default: + return 1; + case PROCESSOR_V9: /* Assume V9 processors are capable of at least dual-issue. */ return 2; - case PROCESSOR_SUPERSPARC: - return 3; + case PROCESSOR_SUPERSPARC: + return 3; case PROCESSOR_HYPERSPARC: case PROCESSOR_SPARCLITE86X: return 2; - case PROCESSOR_ULTRASPARC: - return 4; + case PROCESSOR_ULTRASPARC: + case PROCESSOR_ULTRASPARC3: + return 4; } } @@ -8668,8 +7840,8 @@ set_extends (insn) } /* We _ought_ to have only one kind per function, but... */ -static rtx sparc_addr_diff_list; -static rtx sparc_addr_list; +static GTY(()) rtx sparc_addr_diff_list; +static GTY(()) rtx sparc_addr_list; void sparc_defer_case_vector (lab, vec, diff) @@ -8833,6 +8005,8 @@ sparc_check_64 (x, insn) return 0; } +/* Returns assembly code to perform a DImode shift using + a 64-bit global or out register on SPARC-V8+. */ char * sparc_v8plus_shift (operands, insn, opcode) rtx *operands; @@ -8841,8 +8015,11 @@ sparc_v8plus_shift (operands, insn, opcode) { static char asm_code[60]; - if (GET_CODE (operands[3]) == SCRATCH) + /* The scratch register is only required when the destination + register is not a 64-bit global or out register. */ + if (which_alternative != 2) operands[3] = operands[0]; + if (GET_CODE (operands[1]) == CONST_INT) { output_asm_insn ("mov\t%1, %3", operands); @@ -8856,6 +8033,7 @@ sparc_v8plus_shift (operands, insn, opcode) } strcpy(asm_code, opcode); + if (which_alternative != 2) return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0"); else @@ -8879,37 +8057,6 @@ sparc_profile_hook (labelno) emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode); } -/* Mark ARG, which is really a struct ultrasparc_pipline_state *, for - GC. */ - -static void -mark_ultrasparc_pipeline_state (arg) - void *arg; -{ - struct ultrasparc_pipeline_state *ups; - size_t i; - - ups = (struct ultrasparc_pipeline_state *) arg; - for (i = 0; i < sizeof (ups->group) / sizeof (rtx); ++i) - ggc_mark_rtx (ups->group[i]); -} - -/* Called to register all of our global variables with the garbage - collector. */ - -static void -sparc_add_gc_roots () -{ - ggc_add_rtx_root (&sparc_compare_op0, 1); - ggc_add_rtx_root (&sparc_compare_op1, 1); - ggc_add_rtx_root (&global_offset_table, 1); - ggc_add_rtx_root (&get_pc_symbol, 1); - ggc_add_rtx_root (&sparc_addr_diff_list, 1); - ggc_add_rtx_root (&sparc_addr_list, 1); - ggc_add_root (ultra_pipe_hist, ARRAY_SIZE (ultra_pipe_hist), - sizeof (ultra_pipe_hist[0]), &mark_ultrasparc_pipeline_state); -} - #ifdef OBJECT_FORMAT_ELF static void sparc_elf_asm_named_section (name, flags) @@ -8939,6 +8086,36 @@ sparc_elf_asm_named_section (name, flags) } #endif /* OBJECT_FORMAT_ELF */ +/* ??? Similar to the standard section selection, but force reloc-y-ness + if SUNOS4_SHARED_LIBRARIES. Unclear why this helps (as opposed to + pretending PIC always on), but that's what the old code did. */ + +static void +sparc_aout_select_section (t, reloc, align) + tree t; + int reloc; + unsigned HOST_WIDE_INT align; +{ + default_select_section (t, reloc | SUNOS4_SHARED_LIBRARIES, align); +} + +/* Use text section for a constant unless we need more alignment than + that offers. */ + +static void +sparc_aout_select_rtx_section (mode, x, align) + enum machine_mode mode; + rtx x; + unsigned HOST_WIDE_INT align; +{ + if (align <= MAX_TEXT_ALIGN + && ! (flag_pic && (symbolic_operand (x, mode) + || SUNOS4_SHARED_LIBRARIES))) + readonly_data_section (); + else + data_section (); +} + int sparc_extra_constraint_check (op, c, strict) rtx op; @@ -9001,14 +8178,347 @@ sparc_extra_constraint_check (op, c, strict) return reload_ok_mem; } +/* ??? This duplicates information provided to the compiler by the + ??? scheduler description. Some day, teach genautomata to output + ??? the latencies and then CSE will just use that. */ + +int +sparc_rtx_costs (x, code, outer_code) + rtx x; + enum rtx_code code, outer_code; +{ + switch (code) + { + case PLUS: case MINUS: case ABS: case NEG: + case FLOAT: case UNSIGNED_FLOAT: + case FIX: case UNSIGNED_FIX: + case FLOAT_EXTEND: case FLOAT_TRUNCATE: + if (FLOAT_MODE_P (GET_MODE (x))) + { + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + case PROCESSOR_ULTRASPARC3: + return COSTS_N_INSNS (4); + + case PROCESSOR_SUPERSPARC: + return COSTS_N_INSNS (3); + + case PROCESSOR_CYPRESS: + return COSTS_N_INSNS (5); + + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + default: + return COSTS_N_INSNS (1); + } + } + + return COSTS_N_INSNS (1); + + case SQRT: + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + if (GET_MODE (x) == SFmode) + return COSTS_N_INSNS (13); + else + return COSTS_N_INSNS (23); + + case PROCESSOR_ULTRASPARC3: + if (GET_MODE (x) == SFmode) + return COSTS_N_INSNS (20); + else + return COSTS_N_INSNS (29); + + case PROCESSOR_SUPERSPARC: + return COSTS_N_INSNS (12); + + case PROCESSOR_CYPRESS: + return COSTS_N_INSNS (63); + + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + return COSTS_N_INSNS (17); + + default: + return COSTS_N_INSNS (30); + } + + case COMPARE: + if (FLOAT_MODE_P (GET_MODE (x))) + { + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + case PROCESSOR_ULTRASPARC3: + return COSTS_N_INSNS (1); + + case PROCESSOR_SUPERSPARC: + return COSTS_N_INSNS (3); + + case PROCESSOR_CYPRESS: + return COSTS_N_INSNS (5); + + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + default: + return COSTS_N_INSNS (1); + } + } + + /* ??? Maybe mark integer compares as zero cost on + ??? all UltraSPARC processors because the result + ??? can be bypassed to a branch in the same group. */ + + return COSTS_N_INSNS (1); + + case MULT: + if (FLOAT_MODE_P (GET_MODE (x))) + { + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + case PROCESSOR_ULTRASPARC3: + return COSTS_N_INSNS (4); + + case PROCESSOR_SUPERSPARC: + return COSTS_N_INSNS (3); + + case PROCESSOR_CYPRESS: + return COSTS_N_INSNS (7); + + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + return COSTS_N_INSNS (1); + + default: + return COSTS_N_INSNS (5); + } + } + + /* The latency is actually variable for Ultra-I/II + And if one of the inputs have a known constant + value, we could calculate this precisely. + + However, for that to be useful we would need to + add some machine description changes which would + make sure small constants ended up in rs1 of the + multiply instruction. This is because the multiply + latency is determined by the number of clear (or + set if the value is negative) bits starting from + the most significant bit of the first input. + + The algorithm for computing num_cycles of a multiply + on Ultra-I/II is: + + if (rs1 < 0) + highest_bit = highest_clear_bit(rs1); + else + highest_bit = highest_set_bit(rs1); + if (num_bits < 3) + highest_bit = 3; + num_cycles = 4 + ((highest_bit - 3) / 2); + + If we did that we would have to also consider register + allocation issues that would result from forcing such + a value into a register. + + There are other similar tricks we could play if we + knew, for example, that one input was an array index. + + Since we do not play any such tricks currently the + safest thing to do is report the worst case latency. */ + if (sparc_cpu == PROCESSOR_ULTRASPARC) + return (GET_MODE (x) == DImode ? + COSTS_N_INSNS (34) : COSTS_N_INSNS (19)); + + /* Multiply latency on Ultra-III, fortunately, is constant. */ + if (sparc_cpu == PROCESSOR_ULTRASPARC3) + return COSTS_N_INSNS (6); + + if (sparc_cpu == PROCESSOR_HYPERSPARC + || sparc_cpu == PROCESSOR_SPARCLITE86X) + return COSTS_N_INSNS (17); + + return (TARGET_HARD_MUL + ? COSTS_N_INSNS (5) + : COSTS_N_INSNS (25)); + + case DIV: + case UDIV: + case MOD: + case UMOD: + if (FLOAT_MODE_P (GET_MODE (x))) + { + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + if (GET_MODE (x) == SFmode) + return COSTS_N_INSNS (13); + else + return COSTS_N_INSNS (23); + + case PROCESSOR_ULTRASPARC3: + if (GET_MODE (x) == SFmode) + return COSTS_N_INSNS (17); + else + return COSTS_N_INSNS (20); + + case PROCESSOR_SUPERSPARC: + if (GET_MODE (x) == SFmode) + return COSTS_N_INSNS (6); + else + return COSTS_N_INSNS (9); + + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + if (GET_MODE (x) == SFmode) + return COSTS_N_INSNS (8); + else + return COSTS_N_INSNS (12); + + default: + return COSTS_N_INSNS (7); + } + } + + if (sparc_cpu == PROCESSOR_ULTRASPARC) + return (GET_MODE (x) == DImode ? + COSTS_N_INSNS (68) : COSTS_N_INSNS (37)); + if (sparc_cpu == PROCESSOR_ULTRASPARC3) + return (GET_MODE (x) == DImode ? + COSTS_N_INSNS (71) : COSTS_N_INSNS (40)); + return COSTS_N_INSNS (25); + + case IF_THEN_ELSE: + /* Conditional moves. */ + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + return COSTS_N_INSNS (2); + + case PROCESSOR_ULTRASPARC3: + if (FLOAT_MODE_P (GET_MODE (x))) + return COSTS_N_INSNS (3); + else + return COSTS_N_INSNS (2); + + default: + return COSTS_N_INSNS (1); + } + + case MEM: + /* If outer-code is SIGN/ZERO extension we have to subtract + out COSTS_N_INSNS (1) from whatever we return in determining + the cost. */ + switch (sparc_cpu) + { + case PROCESSOR_ULTRASPARC: + if (outer_code == ZERO_EXTEND) + return COSTS_N_INSNS (1); + else + return COSTS_N_INSNS (2); + + case PROCESSOR_ULTRASPARC3: + if (outer_code == ZERO_EXTEND) + { + if (GET_MODE (x) == QImode + || GET_MODE (x) == HImode + || outer_code == SIGN_EXTEND) + return COSTS_N_INSNS (2); + else + return COSTS_N_INSNS (1); + } + else + { + /* This handles sign extension (3 cycles) + and everything else (2 cycles). */ + return COSTS_N_INSNS (2); + } + + case PROCESSOR_SUPERSPARC: + if (FLOAT_MODE_P (GET_MODE (x)) + || outer_code == ZERO_EXTEND + || outer_code == SIGN_EXTEND) + return COSTS_N_INSNS (0); + else + return COSTS_N_INSNS (1); + + case PROCESSOR_TSC701: + if (outer_code == ZERO_EXTEND + || outer_code == SIGN_EXTEND) + return COSTS_N_INSNS (2); + else + return COSTS_N_INSNS (3); + + case PROCESSOR_CYPRESS: + if (outer_code == ZERO_EXTEND + || outer_code == SIGN_EXTEND) + return COSTS_N_INSNS (1); + else + return COSTS_N_INSNS (2); + + case PROCESSOR_HYPERSPARC: + case PROCESSOR_SPARCLITE86X: + default: + if (outer_code == ZERO_EXTEND + || outer_code == SIGN_EXTEND) + return COSTS_N_INSNS (0); + else + return COSTS_N_INSNS (1); + } + + case CONST_INT: + if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000) + return 0; + + /* fallthru */ + case HIGH: + return 2; + + case CONST: + case LABEL_REF: + case SYMBOL_REF: + return 4; + + case CONST_DOUBLE: + if (GET_MODE (x) == DImode) + if ((XINT (x, 3) == 0 + && (unsigned) XINT (x, 2) < 0x1000) + || (XINT (x, 3) == -1 + && XINT (x, 2) < 0 + && XINT (x, 2) >= -0x1000)) + return 0; + return 8; + + default: + abort(); + }; +} + +/* If we are referencing a function make the SYMBOL_REF special. In + the Embedded Medium/Anywhere code model, %g4 points to the data + segment so we must not add it to function addresses. */ + +static void +sparc_encode_section_info (decl, first) + tree decl; + int first ATTRIBUTE_UNUSED; +{ + if (TARGET_CM_EMBMEDANY && TREE_CODE (decl) == FUNCTION_DECL) + SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; +} + /* Output code to add DELTA to the first argument, and then jump to FUNCTION. Used for C++ multiple inheritance. */ -void -sparc_output_mi_thunk (file, thunk_fndecl, delta, function) +static void +sparc_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function) FILE *file; tree thunk_fndecl ATTRIBUTE_UNUSED; HOST_WIDE_INT delta; + HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED; tree function; { rtx this, insn, funexp, delta_rtx, tmp; @@ -9067,3 +8577,5 @@ sparc_output_mi_thunk (file, thunk_fndecl, delta, function) reload_completed = 0; no_new_pseudos = 0; } + +#include "gt-sparc.h" |