diff options
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch')
-rw-r--r-- | meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch | 3346 |
1 files changed, 3346 insertions, 0 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch new file mode 100644 index 0000000..9b0fb0b --- /dev/null +++ b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch @@ -0,0 +1,3346 @@ +2011-01-14 Bernd Schmidt <bernds@codesourcery.com> + + gcc/ + * function.c (thread_prologue_and_epilogue_insns): Avoid uninitialized + variable. + +2011-01-12 Bernd Schmidt <bernds@codesourcery.com> + + gcc/ + * config/s390/s390.c (s390_emit_epilogue): Don't use gen_rtx_RETURN. + * config/rx/rx.c (gen_rx_rtsd_vector): Likewise. + * config/m68hc11/m68hc11.md (return): Likewise. + * config/cris/cris.c (cris_expand_return): Likewise. + * config/m68k/m68k.c (m68k_expand_epilogue): Likewise. + * config/picochip/picochip.c (picochip_expand_epilogue): Likewise. + * config/h8300/h8300.c (h8300_push_pop, h8300_expand_epilogue): + Likewise. + * config/v850/v850.c (expand_epilogue): Likewise. + * config/bfin/bfin.c (bfin_expand_call): Likewise. + +2011-01-04 Catherine Moore <clm@codesourcery.com> + + gcc/ + * config/rs6000/rs6000.c (rs6000_make_savres_rtx): Change + gen_rtx_RETURN to ret_rtx. + (rs6000_emit_epilogue): Likewise. + (rs6000_output_mi_thunk): Likewise. + +2011-01-03 Bernd Schmidt <bernds@codesourcery.com> + + gcc/ + * doc/tm.texi (RETURN_ADDR_REGNUM): Document. + * doc/md.texi (simple_return): Document pattern. + (return): Add a sentence to clarify. + * doc/rtl.texi (simple_return): Document. + * doc/invoke.texi (Optimize Options): Document -fshrink-wrap. + * common.opt (fshrink-wrap): New. + * opts.c (decode_options): Set it for -O2 and above. + * gengenrtl.c (special_rtx): PC, CC0, RETURN and SIMPLE_RETURN + are special. + * rtl.h (ANY_RETURN_P): New macro. + (global_rtl_index): Add GR_RETURN and GR_SIMPLE_RETURN. + (ret_rtx, simple_return_rtx): New macros. + * genemit.c (gen_exp): RETURN and SIMPLE_RETURN have unique rtxs. + (gen_expand, gen_split): Use ANY_RETURN_P. + * rtl.c (copy_rtx): RETURN and SIMPLE_RETURN are shared. + * emit-rtl.c (verify_rtx_sharing): Likewise. + (skip_consecutive_labels): Return the argument if it is a return rtx. + (classify_insn): Handle both kinds of return. + (init_emit_regs): Create global rtl for ret_rtx and simple_return_rtx. + * df-scan.c (df_uses_record): Handle SIMPLE_RETURN. + * rtl.def (SIMPLE_RETURN): New. + * rtlanal.c (tablejump_p): Check JUMP_LABEL for returns. + * final.c (final_scan_insn): Recognize both kinds of return. + * reorg.c (function_return_label, function_simple_return_label): New + static variables. + (end_of_function_label): Remove. + (simplejump_or_return_p): New static function. + (find_end_label): Add a new arg, KIND. All callers changed. + Depending on KIND, look for a label suitable for return or + simple_return. + (make_return_insns): Make corresponding changes. + (get_jump_flags): Check JUMP_LABELs for returns. + (follow_jumps): Likewise. + (get_branch_condition): Check target for return patterns rather + than NULL. + (own_thread_p): Likewise for thread. + (steal_delay_list_from_target): Check JUMP_LABELs for returns. + Use simplejump_or_return_p. + (fill_simple_delay_slots): Likewise. + (optimize_skip): Likewise. + (fill_slots_from_thread): Likewise. + (relax_delay_slots): Likewise. + (dbr_schedule): Adjust handling of end_of_function_label for the + two new variables. + * ifcvt.c (find_if_case_1): Take care when redirecting jumps to the + exit block. + (dead_or_predicable): Change NEW_DEST arg to DEST_EDGE. All callers + changed. Ensure that the right label is passed to redirect_jump. + * jump.c (condjump_p, condjump_in_parallel_p, any_condjump_p, + returnjump_p): Handle SIMPLE_RETURNs. + (delete_related_insns): Check JUMP_LABEL for returns. + (redirect_target): New static function. + (redirect_exp_1): Use it. Handle any kind of return rtx as a label + rather than interpreting NULL as a return. + (redirect_jump_1): Assert that nlabel is not NULL. + (redirect_jump): Likewise. + (redirect_jump_2): Handle any kind of return rtx as a label rather + than interpreting NULL as a return. + * dwarf2out.c (compute_barrier_args_size_1): Check JUMP_LABEL for + returns. + * function.c (emit_return_into_block): Remove useless declaration. + (record_hard_reg_sets, frame_required_for_rtx, gen_return_pattern, + requires_stack_frame_p): New static functions. + (emit_return_into_block): New arg SIMPLE_P. All callers changed. + Generate either kind of return pattern and update the JUMP_LABEL. + (thread_prologue_and_epilogue_insns): Implement a form of + shrink-wrapping. Ensure JUMP_LABELs for return insns are set. + * print-rtl.c (print_rtx): Handle returns in JUMP_LABELs. + * cfglayout.c (fixup_reorder_chain): Ensure JUMP_LABELs for returns + remain correct. + * resource.c (find_dead_or_set_registers): Check JUMP_LABELs for + returns. + (mark_target_live_regs): Don't pass a return rtx to next_active_insn. + * basic-block.h (force_nonfallthru_and_redirect): Declare. + * sched-vis.c (print_pattern): Add case for SIMPLE_RETURN. + * cfgrtl.c (force_nonfallthru_and_redirect): No longer static. New arg + JUMP_LABEL. All callers changed. Use the label when generating + return insns. + + * config/i386/i386.md (returns, return_str, return_cond): New + code_iterator and corresponding code_attrs. + (<return_str>return): Renamed from return and adapted. + (<return_str>return_internal): Likewise for return_internal. + (<return_str>return_internal_long): Likewise for return_internal_long. + (<return_str>return_pop_internal): Likewise for return_pop_internal. + (<return_str>return_indirect_internal): Likewise for + return_indirect_internal. + * config/i386/i386.c (ix86_expand_epilogue): Expand a simple_return as + the last insn. + (ix86_pad_returns): Handle both kinds of return rtx. + * config/arm/arm.c (use_simple_return_p): new function. + (is_jump_table): Handle returns in JUMP_LABELs. + (output_return_instruction): New arg SIMPLE. All callers changed. + Use it to determine which kind of return to generate. + (arm_final_prescan_insn): Handle both kinds of return. + * config/arm/arm.md (returns, return_str, return_simple_p, + return_cond): New code_iterator and corresponding code_attrs. + (<return_str>return): Renamed from return and adapted. + (arm_<return_str>return): Renamed from arm_return and adapted. + (cond_<return_str>return): Renamed from cond_return and adapted. + (cond_<return_str>return_inverted): Renamed from cond_return_inverted + and adapted. + (epilogue): Use ret_rtx instead of gen_rtx_RETURN. + * config/arm/thumb2.md (thumb2_<return_str>return): Renamed from + thumb2_return and adapted. + * config/arm/arm.h (RETURN_ADDR_REGNUM): Define. + * config/arm/arm-protos.h (use_simple_return_p): Declare. + (output_return_instruction): Adjust declaration. + * config/mips/mips.c (mips_expand_epilogue): Generate a simple_return + as final insn. + * config/mips/mips.md (simple_return): New expander. + (*simple_return, simple_return_internal): New patterns. + * config/sh/sh.c (barrier_align): Handle return in a JUMP_LABEL. + (split_branches): Don't pass a null label to redirect_jump. + + From mainline: + * vec.h (FOR_EACH_VEC_ELT, FOR_EACH_VEC_ELT_REVERSE): New macros. + * haifa-sched.c (find_fallthru_edge_from): Rename from + find_fallthru_edge. All callers changed. + * sched-int.h (find_fallthru_edge_from): Rename declaration as well. + * basic-block.h (find_fallthru_edge): New inline function. + +=== modified file 'gcc/basic-block.h' +Index: gcc-4_5-branch/gcc/basic-block.h +=================================================================== +--- gcc-4_5-branch.orig/gcc/basic-block.h ++++ gcc-4_5-branch/gcc/basic-block.h +@@ -884,6 +884,7 @@ extern void flow_edge_list_print (const + + /* In cfgrtl.c */ + extern basic_block force_nonfallthru (edge); ++extern basic_block force_nonfallthru_and_redirect (edge, basic_block, rtx); + extern rtx block_label (basic_block); + extern bool purge_all_dead_edges (void); + extern bool purge_dead_edges (basic_block); +@@ -1004,6 +1005,20 @@ bb_has_abnormal_pred (basic_block bb) + return false; + } + ++/* Return the fallthru edge in EDGES if it exists, NULL otherwise. */ ++static inline edge ++find_fallthru_edge (VEC(edge,gc) *edges) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, edges) ++ if (e->flags & EDGE_FALLTHRU) ++ break; ++ ++ return e; ++} ++ + /* In cfgloopmanip.c. */ + extern edge mfb_kj_edge; + extern bool mfb_keep_just (edge); +Index: gcc-4_5-branch/gcc/cfganal.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/cfganal.c ++++ gcc-4_5-branch/gcc/cfganal.c +@@ -271,6 +271,37 @@ set_edge_can_fallthru_flag (void) + EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU; + EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU; + } ++ /* dwarf2out expects that a NOTE_INSN_EPILOGUE_BEGIN is always paired ++ with a return or a sibcall. Ensure that this remains the case if ++ they are in different basic blocks. */ ++ FOR_EACH_BB (bb) ++ { ++ edge e; ++ edge_iterator ei; ++ rtx insn, end; ++ ++ end = BB_END (bb); ++ FOR_BB_INSNS (bb, insn) ++ if (GET_CODE (insn) == NOTE ++ && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG ++ && !(CALL_P (end) && SIBLING_CALL_P (end)) ++ && !returnjump_p (end)) ++ { ++ basic_block other_bb = NULL; ++ FOR_EACH_EDGE (e, ei, bb->succs) ++ { ++ if (e->flags & EDGE_FALLTHRU) ++ other_bb = e->dest; ++ else ++ e->flags &= ~EDGE_CAN_FALLTHRU; ++ } ++ FOR_EACH_EDGE (e, ei, other_bb->preds) ++ { ++ if (!(e->flags & EDGE_FALLTHRU)) ++ e->flags &= ~EDGE_CAN_FALLTHRU; ++ } ++ } ++ } + } + + /* Find unreachable blocks. An unreachable block will have 0 in +Index: gcc-4_5-branch/gcc/cfglayout.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/cfglayout.c ++++ gcc-4_5-branch/gcc/cfglayout.c +@@ -766,6 +766,7 @@ fixup_reorder_chain (void) + { + edge e_fall, e_taken, e; + rtx bb_end_insn; ++ rtx ret_label = NULL_RTX; + basic_block nb; + edge_iterator ei; + +@@ -785,6 +786,7 @@ fixup_reorder_chain (void) + bb_end_insn = BB_END (bb); + if (JUMP_P (bb_end_insn)) + { ++ ret_label = JUMP_LABEL (bb_end_insn); + if (any_condjump_p (bb_end_insn)) + { + /* This might happen if the conditional jump has side +@@ -899,7 +901,7 @@ fixup_reorder_chain (void) + } + + /* We got here if we need to add a new jump insn. */ +- nb = force_nonfallthru (e_fall); ++ nb = force_nonfallthru_and_redirect (e_fall, e_fall->dest, ret_label); + if (nb) + { + nb->il.rtl->visited = 1; +@@ -1118,24 +1120,30 @@ extern bool cfg_layout_can_duplicate_bb_ + bool + cfg_layout_can_duplicate_bb_p (const_basic_block bb) + { ++ rtx insn; ++ + /* Do not attempt to duplicate tablejumps, as we need to unshare + the dispatch table. This is difficult to do, as the instructions + computing jump destination may be hoisted outside the basic block. */ + if (tablejump_p (BB_END (bb), NULL, NULL)) + return false; + +- /* Do not duplicate blocks containing insns that can't be copied. */ +- if (targetm.cannot_copy_insn_p) ++ insn = BB_HEAD (bb); ++ while (1) + { +- rtx insn = BB_HEAD (bb); +- while (1) +- { +- if (INSN_P (insn) && targetm.cannot_copy_insn_p (insn)) +- return false; +- if (insn == BB_END (bb)) +- break; +- insn = NEXT_INSN (insn); +- } ++ /* Do not duplicate blocks containing insns that can't be copied. */ ++ if (INSN_P (insn) && targetm.cannot_copy_insn_p ++ && targetm.cannot_copy_insn_p (insn)) ++ return false; ++ /* dwarf2out expects that these notes are always paired with a ++ returnjump or sibling call. */ ++ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG ++ && !returnjump_p (BB_END (bb)) ++ && (!CALL_P (BB_END (bb)) || !SIBLING_CALL_P (BB_END (bb)))) ++ return false; ++ if (insn == BB_END (bb)) ++ break; ++ insn = NEXT_INSN (insn); + } + + return true; +@@ -1180,6 +1188,9 @@ duplicate_insn_chain (rtx from, rtx to) + break; + } + copy = emit_copy_of_insn_after (insn, get_last_insn ()); ++ if (JUMP_P (insn) && JUMP_LABEL (insn) != NULL_RTX ++ && ANY_RETURN_P (JUMP_LABEL (insn))) ++ JUMP_LABEL (copy) = JUMP_LABEL (insn); + maybe_copy_epilogue_insn (insn, copy); + break; + +Index: gcc-4_5-branch/gcc/cfgrtl.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/cfgrtl.c ++++ gcc-4_5-branch/gcc/cfgrtl.c +@@ -1107,10 +1107,13 @@ rtl_redirect_edge_and_branch (edge e, ba + } + + /* Like force_nonfallthru below, but additionally performs redirection +- Used by redirect_edge_and_branch_force. */ ++ Used by redirect_edge_and_branch_force. JUMP_LABEL is used only ++ when redirecting to the EXIT_BLOCK, it is either a return or a ++ simple_return rtx indicating which kind of returnjump to create. ++ It should be NULL otherwise. */ + +-static basic_block +-force_nonfallthru_and_redirect (edge e, basic_block target) ++basic_block ++force_nonfallthru_and_redirect (edge e, basic_block target, rtx jump_label) + { + basic_block jump_block, new_bb = NULL, src = e->src; + rtx note; +@@ -1242,11 +1245,25 @@ force_nonfallthru_and_redirect (edge e, + e->flags &= ~EDGE_FALLTHRU; + if (target == EXIT_BLOCK_PTR) + { ++ if (jump_label == ret_rtx) ++ { + #ifdef HAVE_return +- emit_jump_insn_after_setloc (gen_return (), BB_END (jump_block), loc); ++ emit_jump_insn_after_setloc (gen_return (), BB_END (jump_block), ++ loc); + #else +- gcc_unreachable (); ++ gcc_unreachable (); + #endif ++ } ++ else ++ { ++ gcc_assert (jump_label == simple_return_rtx); ++#ifdef HAVE_simple_return ++ emit_jump_insn_after_setloc (gen_simple_return (), ++ BB_END (jump_block), loc); ++#else ++ gcc_unreachable (); ++#endif ++ } + } + else + { +@@ -1273,7 +1290,7 @@ force_nonfallthru_and_redirect (edge e, + basic_block + force_nonfallthru (edge e) + { +- return force_nonfallthru_and_redirect (e, e->dest); ++ return force_nonfallthru_and_redirect (e, e->dest, NULL_RTX); + } + + /* Redirect edge even at the expense of creating new jump insn or +@@ -1290,7 +1307,7 @@ rtl_redirect_edge_and_branch_force (edge + /* In case the edge redirection failed, try to force it to be non-fallthru + and redirect newly created simplejump. */ + df_set_bb_dirty (e->src); +- return force_nonfallthru_and_redirect (e, target); ++ return force_nonfallthru_and_redirect (e, target, NULL_RTX); + } + + /* The given edge should potentially be a fallthru edge. If that is in +Index: gcc-4_5-branch/gcc/common.opt +=================================================================== +--- gcc-4_5-branch.orig/gcc/common.opt ++++ gcc-4_5-branch/gcc/common.opt +@@ -1147,6 +1147,11 @@ fshow-column + Common C ObjC C++ ObjC++ Report Var(flag_show_column) Init(1) + Show column numbers in diagnostics, when available. Default on + ++fshrink-wrap ++Common Report Var(flag_shrink_wrap) Optimization ++Emit function prologues only before parts of the function that need it, ++rather than at the top of the function. ++ + fsignaling-nans + Common Report Var(flag_signaling_nans) Optimization + Disable optimizations observable by IEEE signaling NaNs +Index: gcc-4_5-branch/gcc/config/arm/arm-protos.h +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/arm/arm-protos.h ++++ gcc-4_5-branch/gcc/config/arm/arm-protos.h +@@ -26,6 +26,7 @@ + extern void arm_override_options (void); + extern void arm_optimization_options (int, int); + extern int use_return_insn (int, rtx); ++extern bool use_simple_return_p (void); + extern enum reg_class arm_regno_class (int); + extern void arm_load_pic_register (unsigned long); + extern int arm_volatile_func (void); +@@ -137,7 +138,7 @@ extern int arm_address_offset_is_imm (rt + extern const char *output_add_immediate (rtx *); + extern const char *arithmetic_instr (rtx, int); + extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int); +-extern const char *output_return_instruction (rtx, int, int); ++extern const char *output_return_instruction (rtx, bool, bool, bool); + extern void arm_poke_function_name (FILE *, const char *); + extern void arm_print_operand (FILE *, rtx, int); + extern void arm_print_operand_address (FILE *, rtx); +Index: gcc-4_5-branch/gcc/config/arm/arm.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/arm/arm.c ++++ gcc-4_5-branch/gcc/config/arm/arm.c +@@ -2163,6 +2163,18 @@ arm_trampoline_adjust_address (rtx addr) + return addr; + } + ++/* Return true if we should try to use a simple_return insn, i.e. perform ++ shrink-wrapping if possible. This is the case if we need to emit a ++ prologue, which we can test by looking at the offsets. */ ++bool ++use_simple_return_p (void) ++{ ++ arm_stack_offsets *offsets; ++ ++ offsets = arm_get_frame_offsets (); ++ return offsets->outgoing_args != 0; ++} ++ + /* Return 1 if it is possible to return using a single instruction. + If SIBLING is non-null, this is a test for a return before a sibling + call. SIBLING is the call insn, so we can examine its register usage. */ +@@ -11284,6 +11296,7 @@ is_jump_table (rtx insn) + + if (GET_CODE (insn) == JUMP_INSN + && JUMP_LABEL (insn) != NULL ++ && !ANY_RETURN_P (JUMP_LABEL (insn)) + && ((table = next_real_insn (JUMP_LABEL (insn))) + == next_real_insn (insn)) + && table != NULL +@@ -14168,7 +14181,7 @@ arm_get_vfp_saved_size (void) + /* Generate a function exit sequence. If REALLY_RETURN is false, then do + everything bar the final return instruction. */ + const char * +-output_return_instruction (rtx operand, int really_return, int reverse) ++output_return_instruction (rtx operand, bool really_return, bool reverse, bool simple) + { + char conditional[10]; + char instr[100]; +@@ -14206,10 +14219,15 @@ output_return_instruction (rtx operand, + + sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd'); + +- cfun->machine->return_used_this_function = 1; ++ if (simple) ++ live_regs_mask = 0; ++ else ++ { ++ cfun->machine->return_used_this_function = 1; + +- offsets = arm_get_frame_offsets (); +- live_regs_mask = offsets->saved_regs_mask; ++ offsets = arm_get_frame_offsets (); ++ live_regs_mask = offsets->saved_regs_mask; ++ } + + if (live_regs_mask) + { +@@ -17108,6 +17126,7 @@ arm_final_prescan_insn (rtx insn) + + /* If we start with a return insn, we only succeed if we find another one. */ + int seeking_return = 0; ++ enum rtx_code return_code = UNKNOWN; + + /* START_INSN will hold the insn from where we start looking. This is the + first insn after the following code_label if REVERSE is true. */ +@@ -17146,7 +17165,7 @@ arm_final_prescan_insn (rtx insn) + else + return; + } +- else if (GET_CODE (body) == RETURN) ++ else if (ANY_RETURN_P (body)) + { + start_insn = next_nonnote_insn (start_insn); + if (GET_CODE (start_insn) == BARRIER) +@@ -17157,6 +17176,7 @@ arm_final_prescan_insn (rtx insn) + { + reverse = TRUE; + seeking_return = 1; ++ return_code = GET_CODE (body); + } + else + return; +@@ -17197,11 +17217,15 @@ arm_final_prescan_insn (rtx insn) + label = XEXP (XEXP (SET_SRC (body), 2), 0); + then_not_else = FALSE; + } +- else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN) +- seeking_return = 1; +- else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN) ++ else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1))) ++ { ++ seeking_return = 1; ++ return_code = GET_CODE (XEXP (SET_SRC (body), 1)); ++ } ++ else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2))) + { + seeking_return = 1; ++ return_code = GET_CODE (XEXP (SET_SRC (body), 2)); + then_not_else = FALSE; + } + else +@@ -17302,8 +17326,7 @@ arm_final_prescan_insn (rtx insn) + && !use_return_insn (TRUE, NULL) + && !optimize_size) + fail = TRUE; +- else if (GET_CODE (scanbody) == RETURN +- && seeking_return) ++ else if (GET_CODE (scanbody) == return_code) + { + arm_ccfsm_state = 2; + succeed = TRUE; +Index: gcc-4_5-branch/gcc/config/arm/arm.h +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/arm/arm.h ++++ gcc-4_5-branch/gcc/config/arm/arm.h +@@ -2622,6 +2622,8 @@ extern int making_const_table; + #define RETURN_ADDR_RTX(COUNT, FRAME) \ + arm_return_addr (COUNT, FRAME) + ++#define RETURN_ADDR_REGNUM LR_REGNUM ++ + /* Mask of the bits in the PC that contain the real return address + when running in 26-bit mode. */ + #define RETURN_ADDR_MASK26 (0x03fffffc) +Index: gcc-4_5-branch/gcc/config/arm/arm.md +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/arm/arm.md ++++ gcc-4_5-branch/gcc/config/arm/arm.md +@@ -8882,66 +8882,72 @@ + [(set_attr "type" "call")] + ) + +-(define_expand "return" +- [(return)] +- "TARGET_32BIT && USE_RETURN_INSN (FALSE)" ++;; Both kinds of return insn. ++(define_code_iterator returns [return simple_return]) ++(define_code_attr return_str [(return "") (simple_return "simple_")]) ++(define_code_attr return_simple_p [(return "false") (simple_return "true")]) ++(define_code_attr return_cond [(return " && USE_RETURN_INSN (FALSE)") ++ (simple_return " && use_simple_return_p ()")]) ++ ++(define_expand "<return_str>return" ++ [(returns)] ++ "TARGET_32BIT<return_cond>" + "") + +-;; Often the return insn will be the same as loading from memory, so set attr +-(define_insn "*arm_return" +- [(return)] +- "TARGET_ARM && USE_RETURN_INSN (FALSE)" +- "* +- { +- if (arm_ccfsm_state == 2) +- { +- arm_ccfsm_state += 2; +- return \"\"; +- } +- return output_return_instruction (const_true_rtx, TRUE, FALSE); +- }" ++(define_insn "*arm_<return_str>return" ++ [(returns)] ++ "TARGET_ARM<return_cond>" ++{ ++ if (arm_ccfsm_state == 2) ++ { ++ arm_ccfsm_state += 2; ++ return ""; ++ } ++ return output_return_instruction (const_true_rtx, true, false, ++ <return_simple_p>); ++} + [(set_attr "type" "load1") + (set_attr "length" "12") + (set_attr "predicable" "yes")] + ) + +-(define_insn "*cond_return" ++(define_insn "*cond_<return_str>return" + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) +- (return) ++ (returns) + (pc)))] +- "TARGET_ARM && USE_RETURN_INSN (TRUE)" +- "* +- { +- if (arm_ccfsm_state == 2) +- { +- arm_ccfsm_state += 2; +- return \"\"; +- } +- return output_return_instruction (operands[0], TRUE, FALSE); +- }" ++ "TARGET_ARM<return_cond>" ++{ ++ if (arm_ccfsm_state == 2) ++ { ++ arm_ccfsm_state += 2; ++ return ""; ++ } ++ return output_return_instruction (operands[0], true, false, ++ <return_simple_p>); ++} + [(set_attr "conds" "use") + (set_attr "length" "12") + (set_attr "type" "load1")] + ) + +-(define_insn "*cond_return_inverted" ++(define_insn "*cond_<return_str>return_inverted" + [(set (pc) + (if_then_else (match_operator 0 "arm_comparison_operator" + [(match_operand 1 "cc_register" "") (const_int 0)]) + (pc) +- (return)))] +- "TARGET_ARM && USE_RETURN_INSN (TRUE)" +- "* +- { +- if (arm_ccfsm_state == 2) +- { +- arm_ccfsm_state += 2; +- return \"\"; +- } +- return output_return_instruction (operands[0], TRUE, TRUE); +- }" ++ (returns)))] ++ "TARGET_ARM<return_cond>" ++{ ++ if (arm_ccfsm_state == 2) ++ { ++ arm_ccfsm_state += 2; ++ return ""; ++ } ++ return output_return_instruction (operands[0], true, true, ++ <return_simple_p>); ++} + [(set_attr "conds" "use") + (set_attr "length" "12") + (set_attr "type" "load1")] +@@ -10809,8 +10815,7 @@ + DONE; + } + emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, +- gen_rtvec (1, +- gen_rtx_RETURN (VOIDmode)), ++ gen_rtvec (1, ret_rtx), + VUNSPEC_EPILOGUE)); + DONE; + " +@@ -10827,7 +10832,7 @@ + "TARGET_32BIT" + "* + if (use_return_insn (FALSE, next_nonnote_insn (insn))) +- return output_return_instruction (const_true_rtx, FALSE, FALSE); ++ return output_return_instruction (const_true_rtx, false, false, false); + return arm_output_epilogue (next_nonnote_insn (insn)); + " + ;; Length is absolute worst case +Index: gcc-4_5-branch/gcc/config/arm/thumb2.md +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/arm/thumb2.md ++++ gcc-4_5-branch/gcc/config/arm/thumb2.md +@@ -1020,16 +1020,15 @@ + + ;; Note: this is not predicable, to avoid issues with linker-generated + ;; interworking stubs. +-(define_insn "*thumb2_return" +- [(return)] +- "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)" +- "* +- { +- return output_return_instruction (const_true_rtx, TRUE, FALSE); +- }" ++(define_insn "*thumb2_<return_str>return" ++ [(returns)] ++ "TARGET_THUMB2<return_cond>" ++{ ++ return output_return_instruction (const_true_rtx, true, false, ++ <return_simple_p>); ++} + [(set_attr "type" "load1") +- (set_attr "length" "12")] +-) ++ (set_attr "length" "12")]) + + (define_insn_and_split "thumb2_eh_return" + [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")] +Index: gcc-4_5-branch/gcc/config/bfin/bfin.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/bfin/bfin.c ++++ gcc-4_5-branch/gcc/config/bfin/bfin.c +@@ -2359,7 +2359,7 @@ bfin_expand_call (rtx retval, rtx fnaddr + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg); + XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie); + if (sibcall) +- XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode); ++ XVECEXP (pat, 0, n++) = ret_rtx; + else + XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg); + call = emit_call_insn (pat); +Index: gcc-4_5-branch/gcc/config/cris/cris.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/cris/cris.c ++++ gcc-4_5-branch/gcc/config/cris/cris.c +@@ -1771,7 +1771,7 @@ cris_expand_return (bool on_stack) + we do that until they're fixed. Currently, all return insns in a + function must be the same (not really a limiting factor) so we need + to check that it doesn't change half-way through. */ +- emit_jump_insn (gen_rtx_RETURN (VOIDmode)); ++ emit_jump_insn (ret_rtx); + + CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_RET || !on_stack); + CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_JUMP || on_stack); +Index: gcc-4_5-branch/gcc/config/h8300/h8300.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/h8300/h8300.c ++++ gcc-4_5-branch/gcc/config/h8300/h8300.c +@@ -691,7 +691,7 @@ h8300_push_pop (int regno, int nregs, bo + /* Add the return instruction. */ + if (return_p) + { +- RTVEC_ELT (vec, i) = gen_rtx_RETURN (VOIDmode); ++ RTVEC_ELT (vec, i) = ret_rtx; + i++; + } + +@@ -975,7 +975,7 @@ h8300_expand_epilogue (void) + } + + if (!returned_p) +- emit_jump_insn (gen_rtx_RETURN (VOIDmode)); ++ emit_jump_insn (ret_rtx); + } + + /* Return nonzero if the current function is an interrupt +Index: gcc-4_5-branch/gcc/config/i386/i386.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/i386/i386.c ++++ gcc-4_5-branch/gcc/config/i386/i386.c +@@ -9308,13 +9308,13 @@ ix86_expand_epilogue (int style) + + pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, + popc, -1, true); +- emit_jump_insn (gen_return_indirect_internal (ecx)); ++ emit_jump_insn (gen_simple_return_indirect_internal (ecx)); + } + else +- emit_jump_insn (gen_return_pop_internal (popc)); ++ emit_jump_insn (gen_simple_return_pop_internal (popc)); + } + else +- emit_jump_insn (gen_return_internal ()); ++ emit_jump_insn (gen_simple_return_internal ()); + + /* Restore the state back to the state from the prologue, + so that it's correct for the next epilogue. */ +@@ -26615,7 +26615,7 @@ ix86_pad_returns (void) + rtx prev; + bool replace = false; + +- if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN ++ if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret)) + || optimize_bb_for_size_p (bb)) + continue; + for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) +@@ -26645,7 +26645,10 @@ ix86_pad_returns (void) + } + if (replace) + { +- emit_jump_insn_before (gen_return_internal_long (), ret); ++ if (PATTERN (ret) == ret_rtx) ++ emit_jump_insn_before (gen_return_internal_long (), ret); ++ else ++ emit_jump_insn_before (gen_simple_return_internal_long (), ret); + delete_insn (ret); + } + } +Index: gcc-4_5-branch/gcc/config/i386/i386.md +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/i386/i386.md ++++ gcc-4_5-branch/gcc/config/i386/i386.md +@@ -13798,24 +13798,29 @@ + "" + [(set_attr "length" "0")]) + ++(define_code_iterator returns [return simple_return]) ++(define_code_attr return_str [(return "") (simple_return "simple_")]) ++(define_code_attr return_cond [(return "ix86_can_use_return_insn_p ()") ++ (simple_return "")]) ++ + ;; Insn emitted into the body of a function to return from a function. + ;; This is only done if the function's epilogue is known to be simple. + ;; See comments for ix86_can_use_return_insn_p in i386.c. + +-(define_expand "return" +- [(return)] +- "ix86_can_use_return_insn_p ()" ++(define_expand "<return_str>return" ++ [(returns)] ++ "<return_cond>" + { + if (crtl->args.pops_args) + { + rtx popc = GEN_INT (crtl->args.pops_args); +- emit_jump_insn (gen_return_pop_internal (popc)); ++ emit_jump_insn (gen_<return_str>return_pop_internal (popc)); + DONE; + } + }) + +-(define_insn "return_internal" +- [(return)] ++(define_insn "<return_str>return_internal" ++ [(returns)] + "reload_completed" + "ret" + [(set_attr "length" "1") +@@ -13826,8 +13831,8 @@ + ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET + ;; instruction Athlon and K8 have. + +-(define_insn "return_internal_long" +- [(return) ++(define_insn "<return_str>return_internal_long" ++ [(returns) + (unspec [(const_int 0)] UNSPEC_REP)] + "reload_completed" + "rep\;ret" +@@ -13837,8 +13842,8 @@ + (set_attr "prefix_rep" "1") + (set_attr "modrm" "0")]) + +-(define_insn "return_pop_internal" +- [(return) ++(define_insn "<return_str>return_pop_internal" ++ [(returns) + (use (match_operand:SI 0 "const_int_operand" ""))] + "reload_completed" + "ret\t%0" +@@ -13847,8 +13852,8 @@ + (set_attr "length_immediate" "2") + (set_attr "modrm" "0")]) + +-(define_insn "return_indirect_internal" +- [(return) ++(define_insn "<return_str>return_indirect_internal" ++ [(returns) + (use (match_operand:SI 0 "register_operand" "r"))] + "reload_completed" + "jmp\t%A0" +Index: gcc-4_5-branch/gcc/config/m68hc11/m68hc11.md +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/m68hc11/m68hc11.md ++++ gcc-4_5-branch/gcc/config/m68hc11/m68hc11.md +@@ -6576,7 +6576,7 @@ + if (ret_size && ret_size <= 2) + { + emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, +- gen_rtvec (2, gen_rtx_RETURN (VOIDmode), ++ gen_rtvec (2, ret_rtx, + gen_rtx_USE (VOIDmode, + gen_rtx_REG (HImode, 1))))); + DONE; +@@ -6584,7 +6584,7 @@ + if (ret_size) + { + emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, +- gen_rtvec (2, gen_rtx_RETURN (VOIDmode), ++ gen_rtvec (2, ret_rtx, + gen_rtx_USE (VOIDmode, + gen_rtx_REG (SImode, 0))))); + DONE; +Index: gcc-4_5-branch/gcc/config/m68k/m68k.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/m68k/m68k.c ++++ gcc-4_5-branch/gcc/config/m68k/m68k.c +@@ -1366,7 +1366,7 @@ m68k_expand_epilogue (bool sibcall_p) + EH_RETURN_STACKADJ_RTX)); + + if (!sibcall_p) +- emit_jump_insn (gen_rtx_RETURN (VOIDmode)); ++ emit_jump_insn (ret_rtx); + } + + /* Return true if X is a valid comparison operator for the dbcc +Index: gcc-4_5-branch/gcc/config/mips/mips.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/mips/mips.c ++++ gcc-4_5-branch/gcc/config/mips/mips.c +@@ -10497,7 +10497,8 @@ mips_expand_epilogue (bool sibcall_p) + regno = GP_REG_FIRST + 7; + else + regno = RETURN_ADDR_REGNUM; +- emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, regno))); ++ emit_jump_insn (gen_simple_return_internal (gen_rtx_REG (Pmode, ++ regno))); + } + } + +Index: gcc-4_5-branch/gcc/config/mips/mips.md +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/mips/mips.md ++++ gcc-4_5-branch/gcc/config/mips/mips.md +@@ -5815,6 +5815,18 @@ + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + ++(define_expand "simple_return" ++ [(simple_return)] ++ "!mips_can_use_return_insn ()" ++ { mips_expand_before_return (); }) ++ ++(define_insn "*simple_return" ++ [(simple_return)] ++ "!mips_can_use_return_insn ()" ++ "%*j\t$31%/" ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) ++ + ;; Normal return. + + (define_insn "return_internal" +@@ -5825,6 +5837,14 @@ + [(set_attr "type" "jump") + (set_attr "mode" "none")]) + ++(define_insn "simple_return_internal" ++ [(simple_return) ++ (use (match_operand 0 "pmode_register_operand" ""))] ++ "" ++ "%*j\t%0%/" ++ [(set_attr "type" "jump") ++ (set_attr "mode" "none")]) ++ + ;; Exception return. + (define_insn "mips_eret" + [(return) +Index: gcc-4_5-branch/gcc/config/picochip/picochip.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/picochip/picochip.c ++++ gcc-4_5-branch/gcc/config/picochip/picochip.c +@@ -1996,7 +1996,7 @@ picochip_expand_epilogue (int is_sibling + rtvec p; + p = rtvec_alloc (2); + +- RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode); ++ RTVEC_ELT (p, 0) = ret_rtx; + RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, + gen_rtx_REG (Pmode, LINK_REGNUM)); + emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); +Index: gcc-4_5-branch/gcc/config/rs6000/rs6000.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/rs6000/rs6000.c ++++ gcc-4_5-branch/gcc/config/rs6000/rs6000.c +@@ -18563,7 +18563,7 @@ rs6000_make_savres_rtx (rs6000_stack_t * + p = rtvec_alloc ((lr ? 4 : 3) + n_regs); + + if (!savep && lr) +- RTVEC_ELT (p, offset++) = gen_rtx_RETURN (VOIDmode); ++ RTVEC_ELT (p, offset++) = ret_rtx; + + RTVEC_ELT (p, offset++) + = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 65)); +@@ -19638,7 +19638,7 @@ rs6000_emit_epilogue (int sibcall) + alloc_rname = ggc_strdup (rname); + + j = 0; +- RTVEC_ELT (p, j++) = gen_rtx_RETURN (VOIDmode); ++ RTVEC_ELT (p, j++) = ret_rtx; + RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, + gen_rtx_REG (Pmode, + LR_REGNO)); +@@ -20254,7 +20254,7 @@ rs6000_emit_epilogue (int sibcall) + else + p = rtvec_alloc (2); + +- RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode); ++ RTVEC_ELT (p, 0) = ret_rtx; + RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr) + ? gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 65)) + : gen_rtx_CLOBBER (VOIDmode, +@@ -20695,7 +20695,7 @@ rs6000_output_mi_thunk (FILE *file, tree + gen_rtx_USE (VOIDmode, + gen_rtx_REG (SImode, + LR_REGNO)), +- gen_rtx_RETURN (VOIDmode)))); ++ ret_rtx))); + SIBLING_CALL_P (insn) = 1; + emit_barrier (); + +Index: gcc-4_5-branch/gcc/config/rx/rx.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/rx/rx.c ++++ gcc-4_5-branch/gcc/config/rx/rx.c +@@ -1562,7 +1562,7 @@ gen_rx_rtsd_vector (unsigned int adjust, + : plus_constant (stack_pointer_rtx, + i * UNITS_PER_WORD))); + +- XVECEXP (vector, 0, count - 1) = gen_rtx_RETURN (VOIDmode); ++ XVECEXP (vector, 0, count - 1) = ret_rtx; + + return vector; + } +Index: gcc-4_5-branch/gcc/config/s390/s390.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/s390/s390.c ++++ gcc-4_5-branch/gcc/config/s390/s390.c +@@ -8170,7 +8170,7 @@ s390_emit_epilogue (bool sibcall) + + p = rtvec_alloc (2); + +- RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode); ++ RTVEC_ELT (p, 0) = ret_rtx; + RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg); + emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); + } +Index: gcc-4_5-branch/gcc/config/sh/sh.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/sh/sh.c ++++ gcc-4_5-branch/gcc/config/sh/sh.c +@@ -5252,7 +5252,8 @@ barrier_align (rtx barrier_or_label) + } + if (prev + && JUMP_P (prev) +- && JUMP_LABEL (prev)) ++ && JUMP_LABEL (prev) ++ && !ANY_RETURN_P (JUMP_LABEL (prev))) + { + rtx x; + if (jump_to_next +@@ -5951,7 +5952,7 @@ split_branches (rtx first) + JUMP_LABEL (insn) = far_label; + LABEL_NUSES (far_label)++; + } +- redirect_jump (insn, NULL_RTX, 1); ++ redirect_jump (insn, ret_rtx, 1); + far_label = 0; + } + } +Index: gcc-4_5-branch/gcc/config/v850/v850.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/config/v850/v850.c ++++ gcc-4_5-branch/gcc/config/v850/v850.c +@@ -1832,7 +1832,7 @@ expand_epilogue (void) + { + restore_all = gen_rtx_PARALLEL (VOIDmode, + rtvec_alloc (num_restore + 2)); +- XVECEXP (restore_all, 0, 0) = gen_rtx_RETURN (VOIDmode); ++ XVECEXP (restore_all, 0, 0) = ret_rtx; + XVECEXP (restore_all, 0, 1) + = gen_rtx_SET (VOIDmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, +Index: gcc-4_5-branch/gcc/df-scan.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/df-scan.c ++++ gcc-4_5-branch/gcc/df-scan.c +@@ -3296,6 +3296,7 @@ df_uses_record (enum df_ref_class cl, st + } + + case RETURN: ++ case SIMPLE_RETURN: + break; + + case ASM_OPERANDS: +Index: gcc-4_5-branch/gcc/doc/invoke.texi +=================================================================== +--- gcc-4_5-branch.orig/gcc/doc/invoke.texi ++++ gcc-4_5-branch/gcc/doc/invoke.texi +@@ -5751,6 +5751,7 @@ compilation time. + -fipa-pure-const @gol + -fipa-reference @gol + -fmerge-constants ++-fshrink-wrap @gol + -fsplit-wide-types @gol + -ftree-builtin-call-dce @gol + -ftree-ccp @gol +@@ -6506,6 +6507,12 @@ This option has no effect until one of @ + When pipelining loops during selective scheduling, also pipeline outer loops. + This option has no effect until @option{-fsel-sched-pipelining} is turned on. + ++@item -fshrink-wrap ++@opindex fshrink-wrap ++Emit function prologues only before parts of the function that need it, ++rather than at the top of the function. This flag is enabled by default at ++@option{-O} and higher. ++ + @item -fcaller-saves + @opindex fcaller-saves + Enable values to be allocated in registers that will be clobbered by +Index: gcc-4_5-branch/gcc/doc/md.texi +=================================================================== +--- gcc-4_5-branch.orig/gcc/doc/md.texi ++++ gcc-4_5-branch/gcc/doc/md.texi +@@ -4801,7 +4801,19 @@ RTL generation phase. In this case it i + multiple instructions are usually needed to return from a function, but + some class of functions only requires one instruction to implement a + return. Normally, the applicable functions are those which do not need +-to save any registers or allocate stack space. ++to save any registers or allocate stack space, although some targets ++have instructions that can perform both the epilogue and function return ++in one instruction. ++ ++@cindex @code{simple_return} instruction pattern ++@item @samp{simple_return} ++Subroutine return instruction. This instruction pattern name should be ++defined only if a single instruction can do all the work of returning ++from a function on a path where no epilogue is required. This pattern ++is very similar to the @code{return} instruction pattern, but it is emitted ++only by the shrink-wrapping optimization on paths where the function ++prologue has not been executed, and a function return should occur without ++any of the effects of the epilogue. + + @findex reload_completed + @findex leaf_function_p +Index: gcc-4_5-branch/gcc/doc/rtl.texi +=================================================================== +--- gcc-4_5-branch.orig/gcc/doc/rtl.texi ++++ gcc-4_5-branch/gcc/doc/rtl.texi +@@ -2888,6 +2888,13 @@ placed in @code{pc} to return to the cal + Note that an insn pattern of @code{(return)} is logically equivalent to + @code{(set (pc) (return))}, but the latter form is never used. + ++@findex simple_return ++@item (simple_return) ++Like @code{(return)}, but truly represents only a function return, while ++@code{(return)} may represent an insn that also performs other functions ++of the function epilogue. Like @code{(return)}, this may also occur in ++conditional jumps. ++ + @findex call + @item (call @var{function} @var{nargs}) + Represents a function call. @var{function} is a @code{mem} expression +@@ -3017,7 +3024,7 @@ Represents several side effects performe + brackets stand for a vector; the operand of @code{parallel} is a + vector of expressions. @var{x0}, @var{x1} and so on are individual + side effect expressions---expressions of code @code{set}, @code{call}, +-@code{return}, @code{clobber} or @code{use}. ++@code{return}, @code{simple_return}, @code{clobber} or @code{use}. + + ``In parallel'' means that first all the values used in the individual + side-effects are computed, and second all the actual side-effects are +@@ -3656,14 +3663,16 @@ and @code{call_insn} insns: + @table @code + @findex PATTERN + @item PATTERN (@var{i}) +-An expression for the side effect performed by this insn. This must be +-one of the following codes: @code{set}, @code{call}, @code{use}, +-@code{clobber}, @code{return}, @code{asm_input}, @code{asm_output}, +-@code{addr_vec}, @code{addr_diff_vec}, @code{trap_if}, @code{unspec}, +-@code{unspec_volatile}, @code{parallel}, @code{cond_exec}, or @code{sequence}. If it is a @code{parallel}, +-each element of the @code{parallel} must be one these codes, except that +-@code{parallel} expressions cannot be nested and @code{addr_vec} and +-@code{addr_diff_vec} are not permitted inside a @code{parallel} expression. ++An expression for the side effect performed by this insn. This must ++be one of the following codes: @code{set}, @code{call}, @code{use}, ++@code{clobber}, @code{return}, @code{simple_return}, @code{asm_input}, ++@code{asm_output}, @code{addr_vec}, @code{addr_diff_vec}, ++@code{trap_if}, @code{unspec}, @code{unspec_volatile}, ++@code{parallel}, @code{cond_exec}, or @code{sequence}. If it is a ++@code{parallel}, each element of the @code{parallel} must be one these ++codes, except that @code{parallel} expressions cannot be nested and ++@code{addr_vec} and @code{addr_diff_vec} are not permitted inside a ++@code{parallel} expression. + + @findex INSN_CODE + @item INSN_CODE (@var{i}) +Index: gcc-4_5-branch/gcc/doc/tm.texi +=================================================================== +--- gcc-4_5-branch.orig/gcc/doc/tm.texi ++++ gcc-4_5-branch/gcc/doc/tm.texi +@@ -3287,6 +3287,12 @@ Define this if the return address of a p + from the frame pointer of the previous stack frame. + @end defmac + ++@defmac RETURN_ADDR_REGNUM ++If defined, a C expression whose value is the register number of the return ++address for the current function. Targets that pass the return address on ++the stack should not define this macro. ++@end defmac ++ + @defmac INCOMING_RETURN_ADDR_RTX + A C expression whose value is RTL representing the location of the + incoming return address at the beginning of any function, before the +Index: gcc-4_5-branch/gcc/dwarf2out.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/dwarf2out.c ++++ gcc-4_5-branch/gcc/dwarf2out.c +@@ -1396,7 +1396,7 @@ compute_barrier_args_size_1 (rtx insn, H + { + rtx dest = JUMP_LABEL (insn); + +- if (dest) ++ if (dest && !ANY_RETURN_P (dest)) + { + if (barrier_args_size [INSN_UID (dest)] < 0) + { +Index: gcc-4_5-branch/gcc/emit-rtl.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/emit-rtl.c ++++ gcc-4_5-branch/gcc/emit-rtl.c +@@ -2432,6 +2432,8 @@ verify_rtx_sharing (rtx orig, rtx insn) + case CODE_LABEL: + case PC: + case CC0: ++ case RETURN: ++ case SIMPLE_RETURN: + case SCRATCH: + return; + /* SCRATCH must be shared because they represent distinct values. */ +@@ -3323,14 +3325,17 @@ prev_label (rtx insn) + return insn; + } + +-/* Return the last label to mark the same position as LABEL. Return null +- if LABEL itself is null. */ ++/* Return the last label to mark the same position as LABEL. Return LABEL ++ itself if it is null or any return rtx. */ + + rtx + skip_consecutive_labels (rtx label) + { + rtx insn; + ++ if (label && ANY_RETURN_P (label)) ++ return label; ++ + for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn)) + if (LABEL_P (insn)) + label = insn; +@@ -5209,7 +5214,7 @@ classify_insn (rtx x) + return CODE_LABEL; + if (GET_CODE (x) == CALL) + return CALL_INSN; +- if (GET_CODE (x) == RETURN) ++ if (GET_CODE (x) == RETURN || GET_CODE (x) == SIMPLE_RETURN) + return JUMP_INSN; + if (GET_CODE (x) == SET) + { +@@ -5715,8 +5720,10 @@ init_emit_regs (void) + init_reg_modes_target (); + + /* Assign register numbers to the globally defined register rtx. */ +- pc_rtx = gen_rtx_PC (VOIDmode); +- cc0_rtx = gen_rtx_CC0 (VOIDmode); ++ pc_rtx = gen_rtx_fmt_ (PC, VOIDmode); ++ ret_rtx = gen_rtx_fmt_ (RETURN, VOIDmode); ++ simple_return_rtx = gen_rtx_fmt_ (SIMPLE_RETURN, VOIDmode); ++ cc0_rtx = gen_rtx_fmt_ (CC0, VOIDmode); + stack_pointer_rtx = gen_raw_REG (Pmode, STACK_POINTER_REGNUM); + frame_pointer_rtx = gen_raw_REG (Pmode, FRAME_POINTER_REGNUM); + hard_frame_pointer_rtx = gen_raw_REG (Pmode, HARD_FRAME_POINTER_REGNUM); +Index: gcc-4_5-branch/gcc/final.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/final.c ++++ gcc-4_5-branch/gcc/final.c +@@ -2428,7 +2428,7 @@ final_scan_insn (rtx insn, FILE *file, i + delete_insn (insn); + break; + } +- else if (GET_CODE (SET_SRC (body)) == RETURN) ++ else if (ANY_RETURN_P (SET_SRC (body))) + /* Replace (set (pc) (return)) with (return). */ + PATTERN (insn) = body = SET_SRC (body); + +Index: gcc-4_5-branch/gcc/function.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/function.c ++++ gcc-4_5-branch/gcc/function.c +@@ -147,9 +147,6 @@ extern tree debug_find_var_in_block_tree + can always export `prologue_epilogue_contains'. */ + static void record_insns (rtx, rtx, htab_t *) ATTRIBUTE_UNUSED; + static bool contains (const_rtx, htab_t); +-#ifdef HAVE_return +-static void emit_return_into_block (basic_block); +-#endif + static void prepare_function_start (void); + static void do_clobber_return_reg (rtx, void *); + static void do_use_return_reg (rtx, void *); +@@ -4987,35 +4984,190 @@ prologue_epilogue_contains (const_rtx in + return 0; + } + ++#ifdef HAVE_simple_return ++/* This collects sets and clobbers of hard registers in a HARD_REG_SET, ++ which is pointed to by DATA. */ ++static void ++record_hard_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) ++{ ++ HARD_REG_SET *pset = (HARD_REG_SET *)data; ++ if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER) ++ { ++ int nregs = hard_regno_nregs[REGNO (x)][GET_MODE (x)]; ++ while (nregs-- > 0) ++ SET_HARD_REG_BIT (*pset, REGNO (x) + nregs); ++ } ++} ++ ++/* A subroutine of requires_stack_frame_p, called via for_each_rtx. ++ If any change is made, set CHANGED ++ to true. */ ++ ++static int ++frame_required_for_rtx (rtx *loc, void *data ATTRIBUTE_UNUSED) ++{ ++ rtx x = *loc; ++ if (x == stack_pointer_rtx || x == hard_frame_pointer_rtx ++ || x == arg_pointer_rtx || x == pic_offset_table_rtx ++#ifdef RETURN_ADDR_REGNUM ++ || (REG_P (x) && REGNO (x) == RETURN_ADDR_REGNUM) ++#endif ++ ) ++ return 1; ++ return 0; ++} ++ ++static bool ++requires_stack_frame_p (rtx insn) ++{ ++ HARD_REG_SET hardregs; ++ unsigned regno; ++ ++ if (!INSN_P (insn) || DEBUG_INSN_P (insn)) ++ return false; ++ if (CALL_P (insn)) ++ return !SIBLING_CALL_P (insn); ++ if (for_each_rtx (&PATTERN (insn), frame_required_for_rtx, NULL)) ++ return true; ++ CLEAR_HARD_REG_SET (hardregs); ++ note_stores (PATTERN (insn), record_hard_reg_sets, &hardregs); ++ AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); ++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) ++ if (TEST_HARD_REG_BIT (hardregs, regno) ++ && df_regs_ever_live_p (regno)) ++ return true; ++ return false; ++} ++#endif ++ + #ifdef HAVE_return +-/* Insert gen_return at the end of block BB. This also means updating +- block_for_insn appropriately. */ ++ ++static rtx ++gen_return_pattern (bool simple_p) ++{ ++#ifdef HAVE_simple_return ++ return simple_p ? gen_simple_return () : gen_return (); ++#else ++ gcc_assert (!simple_p); ++ return gen_return (); ++#endif ++} ++ ++/* Insert an appropriate return pattern at the end of block BB. This ++ also means updating block_for_insn appropriately. */ + + static void +-emit_return_into_block (basic_block bb) ++emit_return_into_block (bool simple_p, basic_block bb) + { +- emit_jump_insn_after (gen_return (), BB_END (bb)); ++ rtx jump; ++ jump = emit_jump_insn_after (gen_return_pattern (simple_p), BB_END (bb)); ++ JUMP_LABEL (jump) = simple_p ? simple_return_rtx : ret_rtx; + } +-#endif /* HAVE_return */ ++#endif + + /* Generate the prologue and epilogue RTL if the machine supports it. Thread + this into place with notes indicating where the prologue ends and where +- the epilogue begins. Update the basic block information when possible. */ ++ the epilogue begins. Update the basic block information when possible. ++ ++ Notes on epilogue placement: ++ There are several kinds of edges to the exit block: ++ * a single fallthru edge from LAST_BB ++ * possibly, edges from blocks containing sibcalls ++ * possibly, fake edges from infinite loops ++ ++ The epilogue is always emitted on the fallthru edge from the last basic ++ block in the function, LAST_BB, into the exit block. ++ ++ If LAST_BB is empty except for a label, it is the target of every ++ other basic block in the function that ends in a return. If a ++ target has a return or simple_return pattern (possibly with ++ conditional variants), these basic blocks can be changed so that a ++ return insn is emitted into them, and their target is adjusted to ++ the real exit block. ++ ++ Notes on shrink wrapping: We implement a fairly conservative ++ version of shrink-wrapping rather than the textbook one. We only ++ generate a single prologue and a single epilogue. This is ++ sufficient to catch a number of interesting cases involving early ++ exits. ++ ++ First, we identify the blocks that require the prologue to occur before ++ them. These are the ones that modify a call-saved register, or reference ++ any of the stack or frame pointer registers. To simplify things, we then ++ mark everything reachable from these blocks as also requiring a prologue. ++ This takes care of loops automatically, and avoids the need to examine ++ whether MEMs reference the frame, since it is sufficient to check for ++ occurrences of the stack or frame pointer. ++ ++ We then compute the set of blocks for which the need for a prologue ++ is anticipatable (borrowing terminology from the shrink-wrapping ++ description in Muchnick's book). These are the blocks which either ++ require a prologue themselves, or those that have only successors ++ where the prologue is anticipatable. The prologue needs to be ++ inserted on all edges from BB1->BB2 where BB2 is in ANTIC and BB1 ++ is not. For the moment, we ensure that only one such edge exists. ++ ++ The epilogue is placed as described above, but we make a ++ distinction between inserting return and simple_return patterns ++ when modifying other blocks that end in a return. Blocks that end ++ in a sibcall omit the sibcall_epilogue if the block is not in ++ ANTIC. */ + + static void + thread_prologue_and_epilogue_insns (void) + { + int inserted = 0; ++ basic_block last_bb; ++ bool last_bb_active; ++#ifdef HAVE_simple_return ++ bool unconverted_simple_returns = false; ++ basic_block simple_return_block = NULL; ++#endif ++ rtx returnjump ATTRIBUTE_UNUSED; ++ rtx seq ATTRIBUTE_UNUSED, epilogue_end ATTRIBUTE_UNUSED; ++ rtx prologue_seq ATTRIBUTE_UNUSED, split_prologue_seq ATTRIBUTE_UNUSED; ++ edge entry_edge, orig_entry_edge, exit_fallthru_edge; + edge e; +-#if defined (HAVE_sibcall_epilogue) || defined (HAVE_epilogue) || defined (HAVE_return) || defined (HAVE_prologue) +- rtx seq; +-#endif +-#if defined (HAVE_epilogue) || defined(HAVE_return) +- rtx epilogue_end = NULL_RTX; +-#endif + edge_iterator ei; ++ bitmap_head bb_flags; ++ ++ df_analyze (); + + rtl_profile_for_bb (ENTRY_BLOCK_PTR); ++ ++ epilogue_end = NULL_RTX; ++ returnjump = NULL_RTX; ++ ++ /* Can't deal with multiple successors of the entry block at the ++ moment. Function should always have at least one entry ++ point. */ ++ gcc_assert (single_succ_p (ENTRY_BLOCK_PTR)); ++ entry_edge = single_succ_edge (ENTRY_BLOCK_PTR); ++ orig_entry_edge = entry_edge; ++ ++ exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR->preds); ++ if (exit_fallthru_edge != NULL) ++ { ++ rtx label; ++ ++ last_bb = exit_fallthru_edge->src; ++ /* Test whether there are active instructions in the last block. */ ++ label = BB_END (last_bb); ++ while (label && !LABEL_P (label)) ++ { ++ if (active_insn_p (label)) ++ break; ++ label = PREV_INSN (label); ++ } ++ ++ last_bb_active = BB_HEAD (last_bb) != label || !LABEL_P (label); ++ } ++ else ++ { ++ last_bb = NULL; ++ last_bb_active = false; ++ } ++ + #ifdef HAVE_prologue + if (HAVE_prologue) + { +@@ -5040,20 +5192,169 @@ thread_prologue_and_epilogue_insns (void + emit_insn (gen_blockage ()); + #endif + +- seq = get_insns (); ++ prologue_seq = get_insns (); + end_sequence (); + set_insn_locators (seq, prologue_locator); ++ } ++#endif + +- /* Can't deal with multiple successors of the entry block +- at the moment. Function should always have at least one +- entry point. */ +- gcc_assert (single_succ_p (ENTRY_BLOCK_PTR)); ++ bitmap_initialize (&bb_flags, &bitmap_default_obstack); + +- insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); +- inserted = 1; ++#ifdef HAVE_simple_return ++ /* Try to perform a kind of shrink-wrapping, making sure the ++ prologue/epilogue is emitted only around those parts of the ++ function that require it. */ ++ ++ if (flag_shrink_wrap && HAVE_simple_return && !flag_non_call_exceptions ++ && HAVE_prologue && !crtl->calls_eh_return) ++ { ++ HARD_REG_SET prologue_clobbered, live_on_edge; ++ rtx p_insn; ++ VEC(basic_block, heap) *vec; ++ basic_block bb; ++ bitmap_head bb_antic_flags; ++ bitmap_head bb_on_list; ++ ++ bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); ++ bitmap_initialize (&bb_on_list, &bitmap_default_obstack); ++ ++ vec = VEC_alloc (basic_block, heap, n_basic_blocks); ++ ++ FOR_EACH_BB (bb) ++ { ++ rtx insn; ++ FOR_BB_INSNS (bb, insn) ++ { ++ if (requires_stack_frame_p (insn)) ++ { ++ bitmap_set_bit (&bb_flags, bb->index); ++ VEC_quick_push (basic_block, vec, bb); ++ break; ++ } ++ } ++ } ++ ++ /* For every basic block that needs a prologue, mark all blocks ++ reachable from it, so as to ensure they are also seen as ++ requiring a prologue. */ ++ while (!VEC_empty (basic_block, vec)) ++ { ++ basic_block tmp_bb = VEC_pop (basic_block, vec); ++ edge e; ++ edge_iterator ei; ++ FOR_EACH_EDGE (e, ei, tmp_bb->succs) ++ { ++ if (e->dest == EXIT_BLOCK_PTR ++ || bitmap_bit_p (&bb_flags, e->dest->index)) ++ continue; ++ bitmap_set_bit (&bb_flags, e->dest->index); ++ VEC_quick_push (basic_block, vec, e->dest); ++ } ++ } ++ /* If the last basic block contains only a label, we'll be able ++ to convert jumps to it to (potentially conditional) return ++ insns later. This means we don't necessarily need a prologue ++ for paths reaching it. */ ++ if (last_bb) ++ { ++ if (!last_bb_active) ++ bitmap_clear_bit (&bb_flags, last_bb->index); ++ else if (!bitmap_bit_p (&bb_flags, last_bb->index)) ++ goto fail_shrinkwrap; ++ } ++ ++ /* Now walk backwards from every block that is marked as needing ++ a prologue to compute the bb_antic_flags bitmap. */ ++ bitmap_copy (&bb_antic_flags, &bb_flags); ++ FOR_EACH_BB (bb) ++ { ++ edge e; ++ edge_iterator ei; ++ if (!bitmap_bit_p (&bb_flags, bb->index)) ++ continue; ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) ++ { ++ VEC_quick_push (basic_block, vec, e->src); ++ bitmap_set_bit (&bb_on_list, e->src->index); ++ } ++ } ++ while (!VEC_empty (basic_block, vec)) ++ { ++ basic_block tmp_bb = VEC_pop (basic_block, vec); ++ edge e; ++ edge_iterator ei; ++ bool all_set = true; ++ ++ bitmap_clear_bit (&bb_on_list, tmp_bb->index); ++ FOR_EACH_EDGE (e, ei, tmp_bb->succs) ++ { ++ if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) ++ { ++ all_set = false; ++ break; ++ } ++ } ++ if (all_set) ++ { ++ bitmap_set_bit (&bb_antic_flags, tmp_bb->index); ++ FOR_EACH_EDGE (e, ei, tmp_bb->preds) ++ if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) ++ { ++ VEC_quick_push (basic_block, vec, e->src); ++ bitmap_set_bit (&bb_on_list, e->src->index); ++ } ++ } ++ } ++ /* Find exactly one edge that leads to a block in ANTIC from ++ a block that isn't. */ ++ if (!bitmap_bit_p (&bb_antic_flags, entry_edge->dest->index)) ++ FOR_EACH_BB (bb) ++ { ++ if (!bitmap_bit_p (&bb_antic_flags, bb->index)) ++ continue; ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) ++ { ++ if (entry_edge != orig_entry_edge) ++ { ++ entry_edge = orig_entry_edge; ++ goto fail_shrinkwrap; ++ } ++ entry_edge = e; ++ } ++ } ++ ++ /* Test whether the prologue is known to clobber any register ++ (other than FP or SP) which are live on the edge. */ ++ CLEAR_HARD_REG_SET (prologue_clobbered); ++ for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) ++ if (NONDEBUG_INSN_P (p_insn)) ++ note_stores (PATTERN (p_insn), record_hard_reg_sets, ++ &prologue_clobbered); ++ CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); ++ if (frame_pointer_needed) ++ CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); ++ ++ CLEAR_HARD_REG_SET (live_on_edge); ++ reg_set_to_hard_reg_set (&live_on_edge, ++ df_get_live_in (entry_edge->dest)); ++ if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) ++ entry_edge = orig_entry_edge; ++ ++ fail_shrinkwrap: ++ bitmap_clear (&bb_antic_flags); ++ bitmap_clear (&bb_on_list); ++ VEC_free (basic_block, heap, vec); + } + #endif + ++ if (prologue_seq != NULL_RTX) ++ { ++ insert_insn_on_edge (prologue_seq, entry_edge); ++ inserted = true; ++ } ++ + /* If the exit block has no non-fake predecessors, we don't need + an epilogue. */ + FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) +@@ -5063,100 +5364,130 @@ thread_prologue_and_epilogue_insns (void + goto epilogue_done; + + rtl_profile_for_bb (EXIT_BLOCK_PTR); ++ + #ifdef HAVE_return +- if (optimize && HAVE_return) ++ /* If we're allowed to generate a simple return instruction, then by ++ definition we don't need a full epilogue. If the last basic ++ block before the exit block does not contain active instructions, ++ examine its predecessors and try to emit (conditional) return ++ instructions. */ ++ if (optimize && !last_bb_active ++ && (HAVE_return || entry_edge != orig_entry_edge)) + { +- /* If we're allowed to generate a simple return instruction, +- then by definition we don't need a full epilogue. Examine +- the block that falls through to EXIT. If it does not +- contain any code, examine its predecessors and try to +- emit (conditional) return instructions. */ +- +- basic_block last; ++ edge_iterator ei2; ++ int i; ++ basic_block bb; + rtx label; ++ VEC(basic_block,heap) *src_bbs; + +- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) +- if (e->flags & EDGE_FALLTHRU) +- break; +- if (e == NULL) ++ if (exit_fallthru_edge == NULL) + goto epilogue_done; +- last = e->src; ++ label = BB_HEAD (last_bb); + +- /* Verify that there are no active instructions in the last block. */ +- label = BB_END (last); +- while (label && !LABEL_P (label)) +- { +- if (active_insn_p (label)) +- break; +- label = PREV_INSN (label); +- } ++ src_bbs = VEC_alloc (basic_block, heap, EDGE_COUNT (last_bb->preds)); ++ FOR_EACH_EDGE (e, ei2, last_bb->preds) ++ if (e->src != ENTRY_BLOCK_PTR) ++ VEC_quick_push (basic_block, src_bbs, e->src); + +- if (BB_HEAD (last) == label && LABEL_P (label)) ++ FOR_EACH_VEC_ELT (basic_block, src_bbs, i, bb) + { +- edge_iterator ei2; ++ bool simple_p; ++ rtx jump; ++ e = find_edge (bb, last_bb); + +- for (ei2 = ei_start (last->preds); (e = ei_safe_edge (ei2)); ) +- { +- basic_block bb = e->src; +- rtx jump; ++ jump = BB_END (bb); + +- if (bb == ENTRY_BLOCK_PTR) +- { +- ei_next (&ei2); +- continue; +- } ++#ifdef HAVE_simple_return ++ simple_p = (entry_edge != orig_entry_edge ++ ? !bitmap_bit_p (&bb_flags, bb->index) : false); ++#else ++ simple_p = false; ++#endif + +- jump = BB_END (bb); +- if (!JUMP_P (jump) || JUMP_LABEL (jump) != label) +- { +- ei_next (&ei2); +- continue; +- } ++ if (!simple_p ++ && (!HAVE_return || !JUMP_P (jump) ++ || JUMP_LABEL (jump) != label)) ++ continue; + +- /* If we have an unconditional jump, we can replace that +- with a simple return instruction. */ +- if (simplejump_p (jump)) +- { +- emit_return_into_block (bb); +- delete_insn (jump); +- } ++ /* If we have an unconditional jump, we can replace that ++ with a simple return instruction. */ ++ if (!JUMP_P (jump)) ++ { ++ emit_barrier_after (BB_END (bb)); ++ emit_return_into_block (simple_p, bb); ++ } ++ else if (simplejump_p (jump)) ++ { ++ emit_return_into_block (simple_p, bb); ++ delete_insn (jump); ++ } ++ else if (condjump_p (jump) && JUMP_LABEL (jump) != label) ++ { ++ basic_block new_bb; ++ edge new_e; + +- /* If we have a conditional jump, we can try to replace +- that with a conditional return instruction. */ +- else if (condjump_p (jump)) +- { +- if (! redirect_jump (jump, 0, 0)) +- { +- ei_next (&ei2); +- continue; +- } ++ gcc_assert (simple_p); ++ new_bb = split_edge (e); ++ emit_barrier_after (BB_END (new_bb)); ++ emit_return_into_block (simple_p, new_bb); ++#ifdef HAVE_simple_return ++ simple_return_block = new_bb; ++#endif ++ new_e = single_succ_edge (new_bb); ++ redirect_edge_succ (new_e, EXIT_BLOCK_PTR); + +- /* If this block has only one successor, it both jumps +- and falls through to the fallthru block, so we can't +- delete the edge. */ +- if (single_succ_p (bb)) +- { +- ei_next (&ei2); +- continue; +- } +- } ++ continue; ++ } ++ /* If we have a conditional jump branching to the last ++ block, we can try to replace that with a conditional ++ return instruction. */ ++ else if (condjump_p (jump)) ++ { ++ rtx dest; ++ if (simple_p) ++ dest = simple_return_rtx; + else ++ dest = ret_rtx; ++ if (! redirect_jump (jump, dest, 0)) + { +- ei_next (&ei2); ++#ifdef HAVE_simple_return ++ if (simple_p) ++ unconverted_simple_returns = true; ++#endif + continue; + } + +- /* Fix up the CFG for the successful change we just made. */ +- redirect_edge_succ (e, EXIT_BLOCK_PTR); ++ /* If this block has only one successor, it both jumps ++ and falls through to the fallthru block, so we can't ++ delete the edge. */ ++ if (single_succ_p (bb)) ++ continue; ++ } ++ else ++ { ++#ifdef HAVE_simple_return ++ if (simple_p) ++ unconverted_simple_returns = true; ++#endif ++ continue; + } + ++ /* Fix up the CFG for the successful change we just made. */ ++ redirect_edge_succ (e, EXIT_BLOCK_PTR); ++ } ++ VEC_free (basic_block, heap, src_bbs); ++ ++ if (HAVE_return) ++ { + /* Emit a return insn for the exit fallthru block. Whether + this is still reachable will be determined later. */ + +- emit_barrier_after (BB_END (last)); +- emit_return_into_block (last); +- epilogue_end = BB_END (last); +- single_succ_edge (last)->flags &= ~EDGE_FALLTHRU; ++ emit_barrier_after (BB_END (last_bb)); ++ emit_return_into_block (false, last_bb); ++ epilogue_end = BB_END (last_bb); ++ if (JUMP_P (epilogue_end)) ++ JUMP_LABEL (epilogue_end) = ret_rtx; ++ single_succ_edge (last_bb)->flags &= ~EDGE_FALLTHRU; + goto epilogue_done; + } + } +@@ -5193,15 +5524,10 @@ thread_prologue_and_epilogue_insns (void + } + #endif + +- /* Find the edge that falls through to EXIT. Other edges may exist +- due to RETURN instructions, but those don't need epilogues. +- There really shouldn't be a mixture -- either all should have +- been converted or none, however... */ ++ /* If nothing falls through into the exit block, we don't need an ++ epilogue. */ + +- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) +- if (e->flags & EDGE_FALLTHRU) +- break; +- if (e == NULL) ++ if (exit_fallthru_edge == NULL) + goto epilogue_done; + + #ifdef HAVE_epilogue +@@ -5217,25 +5543,36 @@ thread_prologue_and_epilogue_insns (void + set_insn_locators (seq, epilogue_locator); + + seq = get_insns (); ++ returnjump = get_last_insn (); + end_sequence (); + +- insert_insn_on_edge (seq, e); ++ insert_insn_on_edge (seq, exit_fallthru_edge); + inserted = 1; ++ if (JUMP_P (returnjump)) ++ { ++ rtx pat = PATTERN (returnjump); ++ if (GET_CODE (pat) == PARALLEL) ++ pat = XVECEXP (pat, 0, 0); ++ if (ANY_RETURN_P (pat)) ++ JUMP_LABEL (returnjump) = pat; ++ else ++ JUMP_LABEL (returnjump) = ret_rtx; ++ } + } + else + #endif + { + basic_block cur_bb; + +- if (! next_active_insn (BB_END (e->src))) ++ if (! next_active_insn (BB_END (exit_fallthru_edge->src))) + goto epilogue_done; + /* We have a fall-through edge to the exit block, the source is not +- at the end of the function, and there will be an assembler epilogue +- at the end of the function. +- We can't use force_nonfallthru here, because that would try to +- use return. Inserting a jump 'by hand' is extremely messy, so ++ at the end of the function, and there will be an assembler epilogue ++ at the end of the function. ++ We can't use force_nonfallthru here, because that would try to ++ use return. Inserting a jump 'by hand' is extremely messy, so + we take advantage of cfg_layout_finalize using +- fixup_fallthru_exit_predecessor. */ ++ fixup_fallthru_exit_predecessor. */ + cfg_layout_initialize (0); + FOR_EACH_BB (cur_bb) + if (cur_bb->index >= NUM_FIXED_BLOCKS +@@ -5244,6 +5581,7 @@ thread_prologue_and_epilogue_insns (void + cfg_layout_finalize (); + } + epilogue_done: ++ + default_rtl_profile (); + + if (inserted) +@@ -5260,33 +5598,93 @@ epilogue_done: + } + } + ++#ifdef HAVE_simple_return ++ /* If there were branches to an empty LAST_BB which we tried to ++ convert to conditional simple_returns, but couldn't for some ++ reason, create a block to hold a simple_return insn and redirect ++ those remaining edges. */ ++ if (unconverted_simple_returns) ++ { ++ edge_iterator ei2; ++ basic_block exit_pred = EXIT_BLOCK_PTR->prev_bb; ++ ++ gcc_assert (entry_edge != orig_entry_edge); ++ ++#ifdef HAVE_epilogue ++ if (simple_return_block == NULL && returnjump != NULL_RTX ++ && JUMP_LABEL (returnjump) == simple_return_rtx) ++ { ++ edge e = split_block (exit_fallthru_edge->src, ++ PREV_INSN (returnjump)); ++ simple_return_block = e->dest; ++ } ++#endif ++ if (simple_return_block == NULL) ++ { ++ basic_block bb; ++ rtx start; ++ ++ bb = create_basic_block (NULL, NULL, exit_pred); ++ start = emit_jump_insn_after (gen_simple_return (), ++ BB_END (bb)); ++ JUMP_LABEL (start) = simple_return_rtx; ++ emit_barrier_after (start); ++ ++ simple_return_block = bb; ++ make_edge (bb, EXIT_BLOCK_PTR, 0); ++ } ++ ++ restart_scan: ++ for (ei2 = ei_start (last_bb->preds); (e = ei_safe_edge (ei2)); ) ++ { ++ basic_block bb = e->src; ++ ++ if (bb != ENTRY_BLOCK_PTR ++ && !bitmap_bit_p (&bb_flags, bb->index)) ++ { ++ redirect_edge_and_branch_force (e, simple_return_block); ++ goto restart_scan; ++ } ++ ei_next (&ei2); ++ ++ } ++ } ++#endif ++ + #ifdef HAVE_sibcall_epilogue + /* Emit sibling epilogues before any sibling call sites. */ + for (ei = ei_start (EXIT_BLOCK_PTR->preds); (e = ei_safe_edge (ei)); ) + { + basic_block bb = e->src; + rtx insn = BB_END (bb); ++ rtx ep_seq; + + if (!CALL_P (insn) +- || ! SIBLING_CALL_P (insn)) ++ || ! SIBLING_CALL_P (insn) ++ || (entry_edge != orig_entry_edge ++ && !bitmap_bit_p (&bb_flags, bb->index))) + { + ei_next (&ei); + continue; + } + +- start_sequence (); +- emit_note (NOTE_INSN_EPILOGUE_BEG); +- emit_insn (gen_sibcall_epilogue ()); +- seq = get_insns (); +- end_sequence (); ++ ep_seq = gen_sibcall_epilogue (); ++ if (ep_seq) ++ { ++ start_sequence (); ++ emit_note (NOTE_INSN_EPILOGUE_BEG); ++ emit_insn (ep_seq); ++ seq = get_insns (); ++ end_sequence (); + +- /* Retain a map of the epilogue insns. Used in life analysis to +- avoid getting rid of sibcall epilogue insns. Do this before we +- actually emit the sequence. */ +- record_insns (seq, NULL, &epilogue_insn_hash); +- set_insn_locators (seq, epilogue_locator); ++ /* Retain a map of the epilogue insns. Used in life analysis to ++ avoid getting rid of sibcall epilogue insns. Do this before we ++ actually emit the sequence. */ ++ record_insns (seq, NULL, &epilogue_insn_hash); ++ set_insn_locators (seq, epilogue_locator); + +- emit_insn_before (seq, insn); ++ emit_insn_before (seq, insn); ++ } + ei_next (&ei); + } + #endif +@@ -5311,6 +5709,8 @@ epilogue_done: + } + #endif + ++ bitmap_clear (&bb_flags); ++ + /* Threading the prologue and epilogue changes the artificial refs + in the entry and exit blocks. */ + epilogue_completed = 1; +Index: gcc-4_5-branch/gcc/genemit.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/genemit.c ++++ gcc-4_5-branch/gcc/genemit.c +@@ -222,6 +222,12 @@ gen_exp (rtx x, enum rtx_code subroutine + case PC: + printf ("pc_rtx"); + return; ++ case RETURN: ++ printf ("ret_rtx"); ++ return; ++ case SIMPLE_RETURN: ++ printf ("simple_return_rtx"); ++ return; + case CLOBBER: + if (REG_P (XEXP (x, 0))) + { +@@ -544,8 +550,8 @@ gen_expand (rtx expand) + || (GET_CODE (next) == PARALLEL + && ((GET_CODE (XVECEXP (next, 0, 0)) == SET + && GET_CODE (SET_DEST (XVECEXP (next, 0, 0))) == PC) +- || GET_CODE (XVECEXP (next, 0, 0)) == RETURN)) +- || GET_CODE (next) == RETURN) ++ || ANY_RETURN_P (XVECEXP (next, 0, 0)))) ++ || ANY_RETURN_P (next)) + printf (" emit_jump_insn ("); + else if ((GET_CODE (next) == SET && GET_CODE (SET_SRC (next)) == CALL) + || GET_CODE (next) == CALL +@@ -660,7 +666,7 @@ gen_split (rtx split) + || (GET_CODE (next) == PARALLEL + && GET_CODE (XVECEXP (next, 0, 0)) == SET + && GET_CODE (SET_DEST (XVECEXP (next, 0, 0))) == PC) +- || GET_CODE (next) == RETURN) ++ || ANY_RETURN_P (next)) + printf (" emit_jump_insn ("); + else if ((GET_CODE (next) == SET && GET_CODE (SET_SRC (next)) == CALL) + || GET_CODE (next) == CALL +Index: gcc-4_5-branch/gcc/gengenrtl.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/gengenrtl.c ++++ gcc-4_5-branch/gcc/gengenrtl.c +@@ -146,6 +146,10 @@ special_rtx (int idx) + || strcmp (defs[idx].enumname, "REG") == 0 + || strcmp (defs[idx].enumname, "SUBREG") == 0 + || strcmp (defs[idx].enumname, "MEM") == 0 ++ || strcmp (defs[idx].enumname, "PC") == 0 ++ || strcmp (defs[idx].enumname, "CC0") == 0 ++ || strcmp (defs[idx].enumname, "RETURN") == 0 ++ || strcmp (defs[idx].enumname, "SIMPLE_RETURN") == 0 + || strcmp (defs[idx].enumname, "CONST_VECTOR") == 0); + } + +Index: gcc-4_5-branch/gcc/haifa-sched.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/haifa-sched.c ++++ gcc-4_5-branch/gcc/haifa-sched.c +@@ -4231,7 +4231,7 @@ xrecalloc (void *p, size_t new_nmemb, si + /* Helper function. + Find fallthru edge from PRED. */ + edge +-find_fallthru_edge (basic_block pred) ++find_fallthru_edge_from (basic_block pred) + { + edge e; + edge_iterator ei; +@@ -4298,7 +4298,7 @@ init_before_recovery (basic_block *befor + edge e; + + last = EXIT_BLOCK_PTR->prev_bb; +- e = find_fallthru_edge (last); ++ e = find_fallthru_edge_from (last); + + if (e) + { +@@ -5234,6 +5234,11 @@ check_cfg (rtx head, rtx tail) + gcc_assert (/* Usual case. */ + (EDGE_COUNT (bb->succs) > 1 + && !BARRIER_P (NEXT_INSN (head))) ++ /* Special cases, see cfglayout.c: ++ fixup_reorder_chain. */ ++ || (EDGE_COUNT (bb->succs) == 1 ++ && (!onlyjump_p (head) ++ || returnjump_p (head))) + /* Or jump to the next instruction. */ + || (EDGE_COUNT (bb->succs) == 1 + && (BB_HEAD (EDGE_I (bb->succs, 0)->dest) +Index: gcc-4_5-branch/gcc/ifcvt.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/ifcvt.c ++++ gcc-4_5-branch/gcc/ifcvt.c +@@ -105,7 +105,7 @@ static int find_if_case_1 (basic_block, + static int find_if_case_2 (basic_block, edge, edge); + static int find_memory (rtx *, void *); + static int dead_or_predicable (basic_block, basic_block, basic_block, +- basic_block, int); ++ edge, int); + static void noce_emit_move_insn (rtx, rtx); + static rtx block_has_only_trap (basic_block); + +@@ -3791,6 +3791,7 @@ find_if_case_1 (basic_block test_bb, edg + basic_block then_bb = then_edge->dest; + basic_block else_bb = else_edge->dest; + basic_block new_bb; ++ rtx else_target = NULL_RTX; + int then_bb_index; + + /* If we are partitioning hot/cold basic blocks, we don't want to +@@ -3840,9 +3841,16 @@ find_if_case_1 (basic_block test_bb, edg + predictable_edge_p (then_edge))))) + return FALSE; + ++ if (else_bb == EXIT_BLOCK_PTR) ++ { ++ rtx jump = BB_END (else_edge->src); ++ gcc_assert (JUMP_P (jump)); ++ else_target = JUMP_LABEL (jump); ++ } ++ + /* Registers set are dead, or are predicable. */ + if (! dead_or_predicable (test_bb, then_bb, else_bb, +- single_succ (then_bb), 1)) ++ single_succ_edge (then_bb), 1)) + return FALSE; + + /* Conversion went ok, including moving the insns and fixing up the +@@ -3859,6 +3867,9 @@ find_if_case_1 (basic_block test_bb, edg + redirect_edge_succ (FALLTHRU_EDGE (test_bb), else_bb); + new_bb = 0; + } ++ else if (else_bb == EXIT_BLOCK_PTR) ++ new_bb = force_nonfallthru_and_redirect (FALLTHRU_EDGE (test_bb), ++ else_bb, else_target); + else + new_bb = redirect_edge_and_branch_force (FALLTHRU_EDGE (test_bb), + else_bb); +@@ -3957,7 +3968,7 @@ find_if_case_2 (basic_block test_bb, edg + return FALSE; + + /* Registers set are dead, or are predicable. */ +- if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ->dest, 0)) ++ if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ, 0)) + return FALSE; + + /* Conversion went ok, including moving the insns and fixing up the +@@ -3995,12 +4006,34 @@ find_memory (rtx *px, void *data ATTRIBU + + static int + dead_or_predicable (basic_block test_bb, basic_block merge_bb, +- basic_block other_bb, basic_block new_dest, int reversep) ++ basic_block other_bb, edge dest_edge, int reversep) + { +- rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX; ++ basic_block new_dest = dest_edge->dest; ++ rtx head, end, jump, earliest = NULL_RTX, old_dest; + bitmap merge_set = NULL; + /* Number of pending changes. */ + int n_validated_changes = 0; ++ rtx new_dest_label; ++ ++ jump = BB_END (dest_edge->src); ++ if (JUMP_P (jump)) ++ { ++ new_dest_label = JUMP_LABEL (jump); ++ if (new_dest_label == NULL_RTX) ++ { ++ new_dest_label = PATTERN (jump); ++ gcc_assert (ANY_RETURN_P (new_dest_label)); ++ } ++ } ++ else if (other_bb != new_dest) ++ { ++ if (new_dest == EXIT_BLOCK_PTR) ++ new_dest_label = ret_rtx; ++ else ++ new_dest_label = block_label (new_dest); ++ } ++ else ++ new_dest_label = NULL_RTX; + + jump = BB_END (test_bb); + +@@ -4220,10 +4253,9 @@ dead_or_predicable (basic_block test_bb, + old_dest = JUMP_LABEL (jump); + if (other_bb != new_dest) + { +- new_label = block_label (new_dest); + if (reversep +- ? ! invert_jump_1 (jump, new_label) +- : ! redirect_jump_1 (jump, new_label)) ++ ? ! invert_jump_1 (jump, new_dest_label) ++ : ! redirect_jump_1 (jump, new_dest_label)) + goto cancel; + } + +@@ -4234,7 +4266,7 @@ dead_or_predicable (basic_block test_bb, + + if (other_bb != new_dest) + { +- redirect_jump_2 (jump, old_dest, new_label, 0, reversep); ++ redirect_jump_2 (jump, old_dest, new_dest_label, 0, reversep); + + redirect_edge_succ (BRANCH_EDGE (test_bb), new_dest); + if (reversep) +Index: gcc-4_5-branch/gcc/jump.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/jump.c ++++ gcc-4_5-branch/gcc/jump.c +@@ -29,7 +29,8 @@ along with GCC; see the file COPYING3. + JUMP_LABEL internal field. With this we can detect labels that + become unused because of the deletion of all the jumps that + formerly used them. The JUMP_LABEL info is sometimes looked +- at by later passes. ++ at by later passes. For return insns, it contains either a ++ RETURN or a SIMPLE_RETURN rtx. + + The subroutines redirect_jump and invert_jump are used + from other passes as well. */ +@@ -742,10 +743,10 @@ condjump_p (const_rtx insn) + return (GET_CODE (x) == IF_THEN_ELSE + && ((GET_CODE (XEXP (x, 2)) == PC + && (GET_CODE (XEXP (x, 1)) == LABEL_REF +- || GET_CODE (XEXP (x, 1)) == RETURN)) ++ || ANY_RETURN_P (XEXP (x, 1)))) + || (GET_CODE (XEXP (x, 1)) == PC + && (GET_CODE (XEXP (x, 2)) == LABEL_REF +- || GET_CODE (XEXP (x, 2)) == RETURN)))); ++ || ANY_RETURN_P (XEXP (x, 2)))))); + } + + /* Return nonzero if INSN is a (possibly) conditional jump inside a +@@ -774,11 +775,11 @@ condjump_in_parallel_p (const_rtx insn) + return 0; + if (XEXP (SET_SRC (x), 2) == pc_rtx + && (GET_CODE (XEXP (SET_SRC (x), 1)) == LABEL_REF +- || GET_CODE (XEXP (SET_SRC (x), 1)) == RETURN)) ++ || ANY_RETURN_P (XEXP (SET_SRC (x), 1)) == RETURN)) + return 1; + if (XEXP (SET_SRC (x), 1) == pc_rtx + && (GET_CODE (XEXP (SET_SRC (x), 2)) == LABEL_REF +- || GET_CODE (XEXP (SET_SRC (x), 2)) == RETURN)) ++ || ANY_RETURN_P (XEXP (SET_SRC (x), 2)))) + return 1; + return 0; + } +@@ -840,8 +841,9 @@ any_condjump_p (const_rtx insn) + a = GET_CODE (XEXP (SET_SRC (x), 1)); + b = GET_CODE (XEXP (SET_SRC (x), 2)); + +- return ((b == PC && (a == LABEL_REF || a == RETURN)) +- || (a == PC && (b == LABEL_REF || b == RETURN))); ++ return ((b == PC && (a == LABEL_REF || a == RETURN || a == SIMPLE_RETURN)) ++ || (a == PC ++ && (b == LABEL_REF || b == RETURN || b == SIMPLE_RETURN))); + } + + /* Return the label of a conditional jump. */ +@@ -878,6 +880,7 @@ returnjump_p_1 (rtx *loc, void *data ATT + switch (GET_CODE (x)) + { + case RETURN: ++ case SIMPLE_RETURN: + case EH_RETURN: + return true; + +@@ -1200,7 +1203,7 @@ delete_related_insns (rtx insn) + /* If deleting a jump, decrement the count of the label, + and delete the label if it is now unused. */ + +- if (JUMP_P (insn) && JUMP_LABEL (insn)) ++ if (JUMP_P (insn) && JUMP_LABEL (insn) && !ANY_RETURN_P (JUMP_LABEL (insn))) + { + rtx lab = JUMP_LABEL (insn), lab_next; + +@@ -1331,6 +1334,18 @@ delete_for_peephole (rtx from, rtx to) + is also an unconditional jump in that case. */ + } + ++/* A helper function for redirect_exp_1; examines its input X and returns ++ either a LABEL_REF around a label, or a RETURN if X was NULL. */ ++static rtx ++redirect_target (rtx x) ++{ ++ if (x == NULL_RTX) ++ return ret_rtx; ++ if (!ANY_RETURN_P (x)) ++ return gen_rtx_LABEL_REF (Pmode, x); ++ return x; ++} ++ + /* Throughout LOC, redirect OLABEL to NLABEL. Treat null OLABEL or + NLABEL as a return. Accrue modifications into the change group. */ + +@@ -1342,37 +1357,19 @@ redirect_exp_1 (rtx *loc, rtx olabel, rt + int i; + const char *fmt; + +- if (code == LABEL_REF) +- { +- if (XEXP (x, 0) == olabel) +- { +- rtx n; +- if (nlabel) +- n = gen_rtx_LABEL_REF (Pmode, nlabel); +- else +- n = gen_rtx_RETURN (VOIDmode); +- +- validate_change (insn, loc, n, 1); +- return; +- } +- } +- else if (code == RETURN && olabel == 0) ++ if ((code == LABEL_REF && XEXP (x, 0) == olabel) ++ || x == olabel) + { +- if (nlabel) +- x = gen_rtx_LABEL_REF (Pmode, nlabel); +- else +- x = gen_rtx_RETURN (VOIDmode); +- if (loc == &PATTERN (insn)) +- x = gen_rtx_SET (VOIDmode, pc_rtx, x); +- validate_change (insn, loc, x, 1); ++ validate_change (insn, loc, redirect_target (nlabel), 1); + return; + } + +- if (code == SET && nlabel == 0 && SET_DEST (x) == pc_rtx ++ if (code == SET && SET_DEST (x) == pc_rtx ++ && ANY_RETURN_P (nlabel) + && GET_CODE (SET_SRC (x)) == LABEL_REF + && XEXP (SET_SRC (x), 0) == olabel) + { +- validate_change (insn, loc, gen_rtx_RETURN (VOIDmode), 1); ++ validate_change (insn, loc, nlabel, 1); + return; + } + +@@ -1409,6 +1406,7 @@ redirect_jump_1 (rtx jump, rtx nlabel) + int ochanges = num_validated_changes (); + rtx *loc, asmop; + ++ gcc_assert (nlabel); + asmop = extract_asm_operands (PATTERN (jump)); + if (asmop) + { +@@ -1430,17 +1428,20 @@ redirect_jump_1 (rtx jump, rtx nlabel) + jump target label is unused as a result, it and the code following + it may be deleted. + +- If NLABEL is zero, we are to turn the jump into a (possibly conditional) +- RETURN insn. ++ Normally, NLABEL will be a label, but it may also be a RETURN or ++ SIMPLE_RETURN rtx; in that case we are to turn the jump into a ++ (possibly conditional) return insn. + + The return value will be 1 if the change was made, 0 if it wasn't +- (this can only occur for NLABEL == 0). */ ++ (this can only occur when trying to produce return insns). */ + + int + redirect_jump (rtx jump, rtx nlabel, int delete_unused) + { + rtx olabel = JUMP_LABEL (jump); + ++ gcc_assert (nlabel != NULL_RTX); ++ + if (nlabel == olabel) + return 1; + +@@ -1452,7 +1453,7 @@ redirect_jump (rtx jump, rtx nlabel, int + } + + /* Fix up JUMP_LABEL and label ref counts after OLABEL has been replaced with +- NLABEL in JUMP. ++ NEW_DEST in JUMP. + If DELETE_UNUSED is positive, delete related insn to OLABEL if its ref + count has dropped to zero. */ + void +@@ -1468,13 +1469,14 @@ redirect_jump_2 (rtx jump, rtx olabel, r + about this. */ + gcc_assert (delete_unused >= 0); + JUMP_LABEL (jump) = nlabel; +- if (nlabel) ++ if (nlabel && !ANY_RETURN_P (nlabel)) + ++LABEL_NUSES (nlabel); + + /* Update labels in any REG_EQUAL note. */ + if ((note = find_reg_note (jump, REG_EQUAL, NULL_RTX)) != NULL_RTX) + { +- if (!nlabel || (invert && !invert_exp_1 (XEXP (note, 0), jump))) ++ if (ANY_RETURN_P (nlabel) ++ || (invert && !invert_exp_1 (XEXP (note, 0), jump))) + remove_note (jump, note); + else + { +@@ -1483,7 +1485,8 @@ redirect_jump_2 (rtx jump, rtx olabel, r + } + } + +- if (olabel && --LABEL_NUSES (olabel) == 0 && delete_unused > 0 ++ if (olabel && !ANY_RETURN_P (olabel) ++ && --LABEL_NUSES (olabel) == 0 && delete_unused > 0 + /* Undefined labels will remain outside the insn stream. */ + && INSN_UID (olabel)) + delete_related_insns (olabel); +Index: gcc-4_5-branch/gcc/opts.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/opts.c ++++ gcc-4_5-branch/gcc/opts.c +@@ -909,6 +909,7 @@ decode_options (unsigned int argc, const + flag_ipa_cp = opt2; + flag_ipa_sra = opt2; + flag_ee = opt2; ++ flag_shrink_wrap = opt2; + + /* Track fields in field-sensitive alias analysis. */ + set_param_value ("max-fields-for-field-sensitive", +Index: gcc-4_5-branch/gcc/print-rtl.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/print-rtl.c ++++ gcc-4_5-branch/gcc/print-rtl.c +@@ -308,9 +308,16 @@ print_rtx (const_rtx in_rtx) + } + } + else if (i == 8 && JUMP_P (in_rtx) && JUMP_LABEL (in_rtx) != NULL) +- /* Output the JUMP_LABEL reference. */ +- fprintf (outfile, "\n%s%*s -> %d", print_rtx_head, indent * 2, "", +- INSN_UID (JUMP_LABEL (in_rtx))); ++ { ++ /* Output the JUMP_LABEL reference. */ ++ fprintf (outfile, "\n%s%*s -> ", print_rtx_head, indent * 2, ""); ++ if (GET_CODE (JUMP_LABEL (in_rtx)) == RETURN) ++ fprintf (outfile, "return"); ++ else if (GET_CODE (JUMP_LABEL (in_rtx)) == SIMPLE_RETURN) ++ fprintf (outfile, "simple_return"); ++ else ++ fprintf (outfile, "%d", INSN_UID (JUMP_LABEL (in_rtx))); ++ } + else if (i == 0 && GET_CODE (in_rtx) == VALUE) + { + #ifndef GENERATOR_FILE +Index: gcc-4_5-branch/gcc/reorg.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/reorg.c ++++ gcc-4_5-branch/gcc/reorg.c +@@ -161,8 +161,11 @@ static rtx *unfilled_firstobj; + #define unfilled_slots_next \ + ((rtx *) obstack_next_free (&unfilled_slots_obstack)) + +-/* Points to the label before the end of the function. */ +-static rtx end_of_function_label; ++/* Points to the label before the end of the function, or before a ++ return insn. */ ++static rtx function_return_label; ++/* Likewise for a simple_return. */ ++static rtx function_simple_return_label; + + /* Mapping between INSN_UID's and position in the code since INSN_UID's do + not always monotonically increase. */ +@@ -175,7 +178,7 @@ static int stop_search_p (rtx, int); + static int resource_conflicts_p (struct resources *, struct resources *); + static int insn_references_resource_p (rtx, struct resources *, bool); + static int insn_sets_resource_p (rtx, struct resources *, bool); +-static rtx find_end_label (void); ++static rtx find_end_label (rtx); + static rtx emit_delay_sequence (rtx, rtx, int); + static rtx add_to_delay_list (rtx, rtx); + static rtx delete_from_delay_slot (rtx); +@@ -220,6 +223,15 @@ static void relax_delay_slots (rtx); + static void make_return_insns (rtx); + #endif + ++/* Return true iff INSN is a simplejump, or any kind of return insn. */ ++ ++static bool ++simplejump_or_return_p (rtx insn) ++{ ++ return (JUMP_P (insn) ++ && (simplejump_p (insn) || ANY_RETURN_P (PATTERN (insn)))); ++} ++ + /* Return TRUE if this insn should stop the search for insn to fill delay + slots. LABELS_P indicates that labels should terminate the search. + In all cases, jumps terminate the search. */ +@@ -335,23 +347,29 @@ insn_sets_resource_p (rtx insn, struct r + + ??? There may be a problem with the current implementation. Suppose + we start with a bare RETURN insn and call find_end_label. It may set +- end_of_function_label just before the RETURN. Suppose the machinery ++ function_return_label just before the RETURN. Suppose the machinery + is able to fill the delay slot of the RETURN insn afterwards. Then +- end_of_function_label is no longer valid according to the property ++ function_return_label is no longer valid according to the property + described above and find_end_label will still return it unmodified. + Note that this is probably mitigated by the following observation: +- once end_of_function_label is made, it is very likely the target of ++ once function_return_label is made, it is very likely the target of + a jump, so filling the delay slot of the RETURN will be much more + difficult. */ + + static rtx +-find_end_label (void) ++find_end_label (rtx kind) + { + rtx insn; ++ rtx *plabel; ++ ++ if (kind == ret_rtx) ++ plabel = &function_return_label; ++ else ++ plabel = &function_simple_return_label; + + /* If we found one previously, return it. */ +- if (end_of_function_label) +- return end_of_function_label; ++ if (*plabel) ++ return *plabel; + + /* Otherwise, see if there is a label at the end of the function. If there + is, it must be that RETURN insns aren't needed, so that is our return +@@ -366,44 +384,44 @@ find_end_label (void) + + /* When a target threads its epilogue we might already have a + suitable return insn. If so put a label before it for the +- end_of_function_label. */ ++ function_return_label. */ + if (BARRIER_P (insn) + && JUMP_P (PREV_INSN (insn)) +- && GET_CODE (PATTERN (PREV_INSN (insn))) == RETURN) ++ && PATTERN (PREV_INSN (insn)) == kind) + { + rtx temp = PREV_INSN (PREV_INSN (insn)); +- end_of_function_label = gen_label_rtx (); +- LABEL_NUSES (end_of_function_label) = 0; ++ rtx label = gen_label_rtx (); ++ LABEL_NUSES (label) = 0; + + /* Put the label before an USE insns that may precede the RETURN insn. */ + while (GET_CODE (temp) == USE) + temp = PREV_INSN (temp); + +- emit_label_after (end_of_function_label, temp); ++ emit_label_after (label, temp); ++ *plabel = label; + } + + else if (LABEL_P (insn)) +- end_of_function_label = insn; ++ *plabel = insn; + else + { +- end_of_function_label = gen_label_rtx (); +- LABEL_NUSES (end_of_function_label) = 0; ++ rtx label = gen_label_rtx (); ++ LABEL_NUSES (label) = 0; + /* If the basic block reorder pass moves the return insn to + some other place try to locate it again and put our +- end_of_function_label there. */ +- while (insn && ! (JUMP_P (insn) +- && (GET_CODE (PATTERN (insn)) == RETURN))) ++ function_return_label there. */ ++ while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind))) + insn = PREV_INSN (insn); + if (insn) + { + insn = PREV_INSN (insn); + +- /* Put the label before an USE insns that may proceed the ++ /* Put the label before an USE insns that may precede the + RETURN insn. */ + while (GET_CODE (insn) == USE) + insn = PREV_INSN (insn); + +- emit_label_after (end_of_function_label, insn); ++ emit_label_after (label, insn); + } + else + { +@@ -413,19 +431,16 @@ find_end_label (void) + && ! HAVE_return + #endif + ) +- { +- /* The RETURN insn has its delay slot filled so we cannot +- emit the label just before it. Since we already have +- an epilogue and cannot emit a new RETURN, we cannot +- emit the label at all. */ +- end_of_function_label = NULL_RTX; +- return end_of_function_label; +- } ++ /* The RETURN insn has its delay slot filled so we cannot ++ emit the label just before it. Since we already have ++ an epilogue and cannot emit a new RETURN, we cannot ++ emit the label at all. */ ++ return NULL_RTX; + #endif /* HAVE_epilogue */ + + /* Otherwise, make a new label and emit a RETURN and BARRIER, + if needed. */ +- emit_label (end_of_function_label); ++ emit_label (label); + #ifdef HAVE_return + /* We don't bother trying to create a return insn if the + epilogue has filled delay-slots; we would have to try and +@@ -437,19 +452,21 @@ find_end_label (void) + /* The return we make may have delay slots too. */ + rtx insn = gen_return (); + insn = emit_jump_insn (insn); ++ JUMP_LABEL (insn) = ret_rtx; + emit_barrier (); + if (num_delay_slots (insn) > 0) + obstack_ptr_grow (&unfilled_slots_obstack, insn); + } + #endif + } ++ *plabel = label; + } + + /* Show one additional use for this label so it won't go away until + we are done. */ +- ++LABEL_NUSES (end_of_function_label); ++ ++LABEL_NUSES (*plabel); + +- return end_of_function_label; ++ return *plabel; + } + + /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace +@@ -797,10 +814,8 @@ optimize_skip (rtx insn) + if ((next_trial == next_active_insn (JUMP_LABEL (insn)) + && ! (next_trial == 0 && crtl->epilogue_delay_list != 0)) + || (next_trial != 0 +- && JUMP_P (next_trial) +- && JUMP_LABEL (insn) == JUMP_LABEL (next_trial) +- && (simplejump_p (next_trial) +- || GET_CODE (PATTERN (next_trial)) == RETURN))) ++ && simplejump_or_return_p (next_trial) ++ && JUMP_LABEL (insn) == JUMP_LABEL (next_trial))) + { + if (eligible_for_annul_false (insn, 0, trial, flags)) + { +@@ -819,13 +834,11 @@ optimize_skip (rtx insn) + branch, thread our jump to the target of that branch. Don't + change this into a RETURN here, because it may not accept what + we have in the delay slot. We'll fix this up later. */ +- if (next_trial && JUMP_P (next_trial) +- && (simplejump_p (next_trial) +- || GET_CODE (PATTERN (next_trial)) == RETURN)) ++ if (next_trial && simplejump_or_return_p (next_trial)) + { + rtx target_label = JUMP_LABEL (next_trial); +- if (target_label == 0) +- target_label = find_end_label (); ++ if (ANY_RETURN_P (target_label)) ++ target_label = find_end_label (target_label); + + if (target_label) + { +@@ -866,7 +879,7 @@ get_jump_flags (rtx insn, rtx label) + if (JUMP_P (insn) + && (condjump_p (insn) || condjump_in_parallel_p (insn)) + && INSN_UID (insn) <= max_uid +- && label != 0 ++ && label != 0 && !ANY_RETURN_P (label) + && INSN_UID (label) <= max_uid) + flags + = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)]) +@@ -1038,7 +1051,7 @@ get_branch_condition (rtx insn, rtx targ + pat = XVECEXP (pat, 0, 0); + + if (GET_CODE (pat) == RETURN) +- return target == 0 ? const_true_rtx : 0; ++ return ANY_RETURN_P (target) ? const_true_rtx : 0; + + else if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx) + return 0; +@@ -1318,7 +1331,11 @@ steal_delay_list_from_target (rtx insn, + } + + /* Show the place to which we will be branching. */ +- *pnew_thread = next_active_insn (JUMP_LABEL (XVECEXP (seq, 0, 0))); ++ temp = JUMP_LABEL (XVECEXP (seq, 0, 0)); ++ if (ANY_RETURN_P (temp)) ++ *pnew_thread = temp; ++ else ++ *pnew_thread = next_active_insn (temp); + + /* Add any new insns to the delay list and update the count of the + number of slots filled. */ +@@ -1358,8 +1375,7 @@ steal_delay_list_from_fallthrough (rtx i + /* We can't do anything if SEQ's delay insn isn't an + unconditional branch. */ + +- if (! simplejump_p (XVECEXP (seq, 0, 0)) +- && GET_CODE (PATTERN (XVECEXP (seq, 0, 0))) != RETURN) ++ if (! simplejump_or_return_p (XVECEXP (seq, 0, 0))) + return delay_list; + + for (i = 1; i < XVECLEN (seq, 0); i++) +@@ -1827,7 +1843,7 @@ own_thread_p (rtx thread, rtx label, int + rtx insn; + + /* We don't own the function end. */ +- if (thread == 0) ++ if (ANY_RETURN_P (thread)) + return 0; + + /* Get the first active insn, or THREAD, if it is an active insn. */ +@@ -2245,7 +2261,8 @@ fill_simple_delay_slots (int non_jumps_p + && (!JUMP_P (insn) + || ((condjump_p (insn) || condjump_in_parallel_p (insn)) + && ! simplejump_p (insn) +- && JUMP_LABEL (insn) != 0))) ++ && JUMP_LABEL (insn) != 0 ++ && !ANY_RETURN_P (JUMP_LABEL (insn))))) + { + /* Invariant: If insn is a JUMP_INSN, the insn's jump + label. Otherwise, zero. */ +@@ -2270,7 +2287,7 @@ fill_simple_delay_slots (int non_jumps_p + target = JUMP_LABEL (insn); + } + +- if (target == 0) ++ if (target == 0 || ANY_RETURN_P (target)) + for (trial = next_nonnote_insn (insn); trial; trial = next_trial) + { + next_trial = next_nonnote_insn (trial); +@@ -2349,6 +2366,7 @@ fill_simple_delay_slots (int non_jumps_p + && JUMP_P (trial) + && simplejump_p (trial) + && (target == 0 || JUMP_LABEL (trial) == target) ++ && !ANY_RETURN_P (JUMP_LABEL (trial)) + && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0 + && ! (NONJUMP_INSN_P (next_trial) + && GET_CODE (PATTERN (next_trial)) == SEQUENCE) +@@ -2371,7 +2389,7 @@ fill_simple_delay_slots (int non_jumps_p + if (new_label != 0) + new_label = get_label_before (new_label); + else +- new_label = find_end_label (); ++ new_label = find_end_label (simple_return_rtx); + + if (new_label) + { +@@ -2503,7 +2521,8 @@ fill_simple_delay_slots (int non_jumps_p + + /* Follow any unconditional jump at LABEL; + return the ultimate label reached by any such chain of jumps. +- Return null if the chain ultimately leads to a return instruction. ++ Return a suitable return rtx if the chain ultimately leads to a ++ return instruction. + If LABEL is not followed by a jump, return LABEL. + If the chain loops or we can't find end, return LABEL, + since that tells caller to avoid changing the insn. */ +@@ -2518,6 +2537,7 @@ follow_jumps (rtx label) + + for (depth = 0; + (depth < 10 ++ && !ANY_RETURN_P (value) + && (insn = next_active_insn (value)) != 0 + && JUMP_P (insn) + && ((JUMP_LABEL (insn) != 0 && any_uncondjump_p (insn) +@@ -2527,18 +2547,22 @@ follow_jumps (rtx label) + && BARRIER_P (next)); + depth++) + { +- rtx tem; ++ rtx this_label = JUMP_LABEL (insn); + + /* If we have found a cycle, make the insn jump to itself. */ +- if (JUMP_LABEL (insn) == label) ++ if (this_label == label) + return label; + +- tem = next_active_insn (JUMP_LABEL (insn)); +- if (tem && (GET_CODE (PATTERN (tem)) == ADDR_VEC ++ if (!ANY_RETURN_P (this_label)) ++ { ++ rtx tem = next_active_insn (this_label); ++ if (tem ++ && (GET_CODE (PATTERN (tem)) == ADDR_VEC + || GET_CODE (PATTERN (tem)) == ADDR_DIFF_VEC)) +- break; ++ break; ++ } + +- value = JUMP_LABEL (insn); ++ value = this_label; + } + if (depth == 10) + return label; +@@ -2901,6 +2925,7 @@ fill_slots_from_thread (rtx insn, rtx co + arithmetic insn after the jump insn and put the arithmetic insn in the + delay slot. If we can't do this, return. */ + if (delay_list == 0 && likely && new_thread ++ && !ANY_RETURN_P (new_thread) + && NONJUMP_INSN_P (new_thread) + && GET_CODE (PATTERN (new_thread)) != ASM_INPUT + && asm_noperands (PATTERN (new_thread)) < 0) +@@ -2985,16 +3010,14 @@ fill_slots_from_thread (rtx insn, rtx co + + gcc_assert (thread_if_true); + +- if (new_thread && JUMP_P (new_thread) +- && (simplejump_p (new_thread) +- || GET_CODE (PATTERN (new_thread)) == RETURN) ++ if (new_thread && simplejump_or_return_p (new_thread) + && redirect_with_delay_list_safe_p (insn, + JUMP_LABEL (new_thread), + delay_list)) + new_thread = follow_jumps (JUMP_LABEL (new_thread)); + +- if (new_thread == 0) +- label = find_end_label (); ++ if (ANY_RETURN_P (new_thread)) ++ label = find_end_label (new_thread); + else if (LABEL_P (new_thread)) + label = new_thread; + else +@@ -3340,11 +3363,12 @@ relax_delay_slots (rtx first) + group of consecutive labels. */ + if (JUMP_P (insn) + && (condjump_p (insn) || condjump_in_parallel_p (insn)) +- && (target_label = JUMP_LABEL (insn)) != 0) ++ && (target_label = JUMP_LABEL (insn)) != 0 ++ && !ANY_RETURN_P (target_label)) + { + target_label = skip_consecutive_labels (follow_jumps (target_label)); +- if (target_label == 0) +- target_label = find_end_label (); ++ if (ANY_RETURN_P (target_label)) ++ target_label = find_end_label (target_label); + + if (target_label && next_active_insn (target_label) == next + && ! condjump_in_parallel_p (insn)) +@@ -3359,9 +3383,8 @@ relax_delay_slots (rtx first) + /* See if this jump conditionally branches around an unconditional + jump. If so, invert this jump and point it to the target of the + second jump. */ +- if (next && JUMP_P (next) ++ if (next && simplejump_or_return_p (next) + && any_condjump_p (insn) +- && (simplejump_p (next) || GET_CODE (PATTERN (next)) == RETURN) + && target_label + && next_active_insn (target_label) == next_active_insn (next) + && no_labels_between_p (insn, next)) +@@ -3403,8 +3426,7 @@ relax_delay_slots (rtx first) + Don't do this if we expect the conditional branch to be true, because + we would then be making the more common case longer. */ + +- if (JUMP_P (insn) +- && (simplejump_p (insn) || GET_CODE (PATTERN (insn)) == RETURN) ++ if (simplejump_or_return_p (insn) + && (other = prev_active_insn (insn)) != 0 + && any_condjump_p (other) + && no_labels_between_p (other, insn) +@@ -3445,10 +3467,10 @@ relax_delay_slots (rtx first) + Only do so if optimizing for size since this results in slower, but + smaller code. */ + if (optimize_function_for_size_p (cfun) +- && GET_CODE (PATTERN (delay_insn)) == RETURN ++ && ANY_RETURN_P (PATTERN (delay_insn)) + && next + && JUMP_P (next) +- && GET_CODE (PATTERN (next)) == RETURN) ++ && PATTERN (next) == PATTERN (delay_insn)) + { + rtx after; + int i; +@@ -3487,14 +3509,16 @@ relax_delay_slots (rtx first) + continue; + + target_label = JUMP_LABEL (delay_insn); ++ if (target_label && ANY_RETURN_P (target_label)) ++ continue; + + if (target_label) + { + /* If this jump goes to another unconditional jump, thread it, but + don't convert a jump into a RETURN here. */ + trial = skip_consecutive_labels (follow_jumps (target_label)); +- if (trial == 0) +- trial = find_end_label (); ++ if (ANY_RETURN_P (trial)) ++ trial = find_end_label (trial); + + if (trial && trial != target_label + && redirect_with_delay_slots_safe_p (delay_insn, trial, insn)) +@@ -3517,7 +3541,7 @@ relax_delay_slots (rtx first) + later incorrectly compute register live/death info. */ + rtx tmp = next_active_insn (trial); + if (tmp == 0) +- tmp = find_end_label (); ++ tmp = find_end_label (simple_return_rtx); + + if (tmp) + { +@@ -3537,14 +3561,12 @@ relax_delay_slots (rtx first) + delay list and that insn is redundant, thread the jump. */ + if (trial && GET_CODE (PATTERN (trial)) == SEQUENCE + && XVECLEN (PATTERN (trial), 0) == 2 +- && JUMP_P (XVECEXP (PATTERN (trial), 0, 0)) +- && (simplejump_p (XVECEXP (PATTERN (trial), 0, 0)) +- || GET_CODE (PATTERN (XVECEXP (PATTERN (trial), 0, 0))) == RETURN) ++ && simplejump_or_return_p (XVECEXP (PATTERN (trial), 0, 0)) + && redundant_insn (XVECEXP (PATTERN (trial), 0, 1), insn, 0)) + { + target_label = JUMP_LABEL (XVECEXP (PATTERN (trial), 0, 0)); +- if (target_label == 0) +- target_label = find_end_label (); ++ if (ANY_RETURN_P (target_label)) ++ target_label = find_end_label (target_label); + + if (target_label + && redirect_with_delay_slots_safe_p (delay_insn, target_label, +@@ -3622,16 +3644,15 @@ relax_delay_slots (rtx first) + a RETURN here. */ + if (! INSN_ANNULLED_BRANCH_P (delay_insn) + && any_condjump_p (delay_insn) +- && next && JUMP_P (next) +- && (simplejump_p (next) || GET_CODE (PATTERN (next)) == RETURN) ++ && next && simplejump_or_return_p (next) + && next_active_insn (target_label) == next_active_insn (next) + && no_labels_between_p (insn, next)) + { + rtx label = JUMP_LABEL (next); + rtx old_label = JUMP_LABEL (delay_insn); + +- if (label == 0) +- label = find_end_label (); ++ if (ANY_RETURN_P (label)) ++ label = find_end_label (label); + + /* find_end_label can generate a new label. Check this first. */ + if (label +@@ -3692,7 +3713,8 @@ static void + make_return_insns (rtx first) + { + rtx insn, jump_insn, pat; +- rtx real_return_label = end_of_function_label; ++ rtx real_return_label = function_return_label; ++ rtx real_simple_return_label = function_simple_return_label; + int slots, i; + + #ifdef DELAY_SLOTS_FOR_EPILOGUE +@@ -3707,18 +3729,25 @@ make_return_insns (rtx first) + #endif + + /* See if there is a RETURN insn in the function other than the one we +- made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change ++ made for FUNCTION_RETURN_LABEL. If so, set up anything we can't change + into a RETURN to jump to it. */ + for (insn = first; insn; insn = NEXT_INSN (insn)) +- if (JUMP_P (insn) && GET_CODE (PATTERN (insn)) == RETURN) ++ if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn))) + { +- real_return_label = get_label_before (insn); ++ rtx t = get_label_before (insn); ++ if (PATTERN (insn) == ret_rtx) ++ real_return_label = t; ++ else ++ real_simple_return_label = t; + break; + } + + /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it +- was equal to END_OF_FUNCTION_LABEL. */ +- LABEL_NUSES (real_return_label)++; ++ was equal to FUNCTION_RETURN_LABEL. */ ++ if (real_return_label) ++ LABEL_NUSES (real_return_label)++; ++ if (real_simple_return_label) ++ LABEL_NUSES (real_simple_return_label)++; + + /* Clear the list of insns to fill so we can use it. */ + obstack_free (&unfilled_slots_obstack, unfilled_firstobj); +@@ -3726,13 +3755,27 @@ make_return_insns (rtx first) + for (insn = first; insn; insn = NEXT_INSN (insn)) + { + int flags; ++ rtx kind, real_label; + + /* Only look at filled JUMP_INSNs that go to the end of function + label. */ + if (!NONJUMP_INSN_P (insn) + || GET_CODE (PATTERN (insn)) != SEQUENCE +- || !JUMP_P (XVECEXP (PATTERN (insn), 0, 0)) +- || JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) != end_of_function_label) ++ || !JUMP_P (XVECEXP (PATTERN (insn), 0, 0))) ++ continue; ++ ++ if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) == function_return_label) ++ { ++ kind = ret_rtx; ++ real_label = real_return_label; ++ } ++ else if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) ++ == function_simple_return_label) ++ { ++ kind = simple_return_rtx; ++ real_label = real_simple_return_label; ++ } ++ else + continue; + + pat = PATTERN (insn); +@@ -3740,14 +3783,12 @@ make_return_insns (rtx first) + + /* If we can't make the jump into a RETURN, try to redirect it to the best + RETURN and go on to the next insn. */ +- if (! reorg_redirect_jump (jump_insn, NULL_RTX)) ++ if (! reorg_redirect_jump (jump_insn, kind)) + { + /* Make sure redirecting the jump will not invalidate the delay + slot insns. */ +- if (redirect_with_delay_slots_safe_p (jump_insn, +- real_return_label, +- insn)) +- reorg_redirect_jump (jump_insn, real_return_label); ++ if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn)) ++ reorg_redirect_jump (jump_insn, real_label); + continue; + } + +@@ -3787,7 +3828,7 @@ make_return_insns (rtx first) + RETURN, delete the SEQUENCE and output the individual insns, + followed by the RETURN. Then set things up so we try to find + insns for its delay slots, if it needs some. */ +- if (GET_CODE (PATTERN (jump_insn)) == RETURN) ++ if (ANY_RETURN_P (PATTERN (jump_insn))) + { + rtx prev = PREV_INSN (insn); + +@@ -3804,13 +3845,16 @@ make_return_insns (rtx first) + else + /* It is probably more efficient to keep this with its current + delay slot as a branch to a RETURN. */ +- reorg_redirect_jump (jump_insn, real_return_label); ++ reorg_redirect_jump (jump_insn, real_label); + } + + /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any + new delay slots we have created. */ +- if (--LABEL_NUSES (real_return_label) == 0) ++ if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0) + delete_related_insns (real_return_label); ++ if (real_simple_return_label != NULL_RTX ++ && --LABEL_NUSES (real_simple_return_label) == 0) ++ delete_related_insns (real_simple_return_label); + + fill_simple_delay_slots (1); + fill_simple_delay_slots (0); +@@ -3878,7 +3922,7 @@ dbr_schedule (rtx first) + init_resource_info (epilogue_insn); + + /* Show we haven't computed an end-of-function label yet. */ +- end_of_function_label = 0; ++ function_return_label = function_simple_return_label = NULL_RTX; + + /* Initialize the statistics for this function. */ + memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays); +@@ -3900,11 +3944,23 @@ dbr_schedule (rtx first) + /* If we made an end of function label, indicate that it is now + safe to delete it by undoing our prior adjustment to LABEL_NUSES. + If it is now unused, delete it. */ +- if (end_of_function_label && --LABEL_NUSES (end_of_function_label) == 0) +- delete_related_insns (end_of_function_label); ++ if (function_return_label && --LABEL_NUSES (function_return_label) == 0) ++ delete_related_insns (function_return_label); ++ if (function_simple_return_label ++ && --LABEL_NUSES (function_simple_return_label) == 0) ++ delete_related_insns (function_simple_return_label); + ++#if defined HAVE_return || defined HAVE_simple_return ++ if ( + #ifdef HAVE_return +- if (HAVE_return && end_of_function_label != 0) ++ (HAVE_return && function_return_label != 0) ++#else ++ 0 ++#endif ++#ifdef HAVE_simple_return ++ || (HAVE_simple_return && function_simple_return_label != 0) ++#endif ++ ) + make_return_insns (first); + #endif + +Index: gcc-4_5-branch/gcc/resource.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/resource.c ++++ gcc-4_5-branch/gcc/resource.c +@@ -495,6 +495,8 @@ find_dead_or_set_registers (rtx target, + || GET_CODE (PATTERN (this_jump_insn)) == RETURN) + { + next = JUMP_LABEL (this_jump_insn); ++ if (next && ANY_RETURN_P (next)) ++ next = NULL_RTX; + if (jump_insn == 0) + { + jump_insn = insn; +@@ -562,9 +564,10 @@ find_dead_or_set_registers (rtx target, + AND_COMPL_HARD_REG_SET (scratch, needed.regs); + AND_COMPL_HARD_REG_SET (fallthrough_res.regs, scratch); + +- find_dead_or_set_registers (JUMP_LABEL (this_jump_insn), +- &target_res, 0, jump_count, +- target_set, needed); ++ if (!ANY_RETURN_P (JUMP_LABEL (this_jump_insn))) ++ find_dead_or_set_registers (JUMP_LABEL (this_jump_insn), ++ &target_res, 0, jump_count, ++ target_set, needed); + find_dead_or_set_registers (next, + &fallthrough_res, 0, jump_count, + set, needed); +@@ -1097,6 +1100,8 @@ mark_target_live_regs (rtx insns, rtx ta + struct resources new_resources; + rtx stop_insn = next_active_insn (jump_insn); + ++ if (jump_target && ANY_RETURN_P (jump_target)) ++ jump_target = NULL_RTX; + mark_target_live_regs (insns, next_active_insn (jump_target), + &new_resources); + CLEAR_RESOURCE (&set); +Index: gcc-4_5-branch/gcc/rtl.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/rtl.c ++++ gcc-4_5-branch/gcc/rtl.c +@@ -256,6 +256,8 @@ copy_rtx (rtx orig) + case CODE_LABEL: + case PC: + case CC0: ++ case RETURN: ++ case SIMPLE_RETURN: + case SCRATCH: + /* SCRATCH must be shared because they represent distinct values. */ + return orig; +Index: gcc-4_5-branch/gcc/rtl.def +=================================================================== +--- gcc-4_5-branch.orig/gcc/rtl.def ++++ gcc-4_5-branch/gcc/rtl.def +@@ -296,6 +296,10 @@ DEF_RTL_EXPR(CALL, "call", "ee", RTX_EXT + + DEF_RTL_EXPR(RETURN, "return", "", RTX_EXTRA) + ++/* A plain return, to be used on paths that are reached without going ++ through the function prologue. */ ++DEF_RTL_EXPR(SIMPLE_RETURN, "simple_return", "", RTX_EXTRA) ++ + /* Special for EH return from subroutine. */ + + DEF_RTL_EXPR(EH_RETURN, "eh_return", "", RTX_EXTRA) +Index: gcc-4_5-branch/gcc/rtl.h +=================================================================== +--- gcc-4_5-branch.orig/gcc/rtl.h ++++ gcc-4_5-branch/gcc/rtl.h +@@ -411,6 +411,10 @@ struct GTY(()) rtvec_def { + (JUMP_P (INSN) && (GET_CODE (PATTERN (INSN)) == ADDR_VEC || \ + GET_CODE (PATTERN (INSN)) == ADDR_DIFF_VEC)) + ++/* Predicate yielding nonzero iff X is a return or simple_preturn. */ ++#define ANY_RETURN_P(X) \ ++ (GET_CODE (X) == RETURN || GET_CODE (X) == SIMPLE_RETURN) ++ + /* 1 if X is a unary operator. */ + + #define UNARY_P(X) \ +@@ -1998,6 +2002,8 @@ enum global_rtl_index + { + GR_PC, + GR_CC0, ++ GR_RETURN, ++ GR_SIMPLE_RETURN, + GR_STACK_POINTER, + GR_FRAME_POINTER, + /* For register elimination to work properly these hard_frame_pointer_rtx, +@@ -2032,6 +2038,8 @@ extern GTY(()) rtx global_rtl[GR_MAX]; + + /* Standard pieces of rtx, to be substituted directly into things. */ + #define pc_rtx (global_rtl[GR_PC]) ++#define ret_rtx (global_rtl[GR_RETURN]) ++#define simple_return_rtx (global_rtl[GR_SIMPLE_RETURN]) + #define cc0_rtx (global_rtl[GR_CC0]) + + /* All references to certain hard regs, except those created +Index: gcc-4_5-branch/gcc/rtlanal.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/rtlanal.c ++++ gcc-4_5-branch/gcc/rtlanal.c +@@ -2673,6 +2673,7 @@ tablejump_p (const_rtx insn, rtx *labelp + + if (JUMP_P (insn) + && (label = JUMP_LABEL (insn)) != NULL_RTX ++ && !ANY_RETURN_P (label) + && (table = next_active_insn (label)) != NULL_RTX + && JUMP_TABLE_DATA_P (table)) + { +Index: gcc-4_5-branch/gcc/sched-int.h +=================================================================== +--- gcc-4_5-branch.orig/gcc/sched-int.h ++++ gcc-4_5-branch/gcc/sched-int.h +@@ -199,7 +199,7 @@ extern int max_issue (struct ready_list + + extern void ebb_compute_jump_reg_dependencies (rtx, regset, regset, regset); + +-extern edge find_fallthru_edge (basic_block); ++extern edge find_fallthru_edge_from (basic_block); + + extern void (* sched_init_only_bb) (basic_block, basic_block); + extern basic_block (* sched_split_block) (basic_block, rtx); +Index: gcc-4_5-branch/gcc/sched-vis.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/sched-vis.c ++++ gcc-4_5-branch/gcc/sched-vis.c +@@ -549,6 +549,9 @@ print_pattern (char *buf, const_rtx x, i + case RETURN: + sprintf (buf, "return"); + break; ++ case SIMPLE_RETURN: ++ sprintf (buf, "simple_return"); ++ break; + case CALL: + print_exp (buf, x, verbose); + break; +Index: gcc-4_5-branch/gcc/sel-sched-ir.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/sel-sched-ir.c ++++ gcc-4_5-branch/gcc/sel-sched-ir.c +@@ -686,7 +686,7 @@ merge_fences (fence_t f, insn_t insn, + + /* Find fallthrough edge. */ + gcc_assert (BLOCK_FOR_INSN (insn)->prev_bb); +- candidate = find_fallthru_edge (BLOCK_FOR_INSN (insn)->prev_bb); ++ candidate = find_fallthru_edge_from (BLOCK_FOR_INSN (insn)->prev_bb); + + if (!candidate + || (candidate->src != BLOCK_FOR_INSN (last_scheduled_insn) +Index: gcc-4_5-branch/gcc/sel-sched.c +=================================================================== +--- gcc-4_5-branch.orig/gcc/sel-sched.c ++++ gcc-4_5-branch/gcc/sel-sched.c +@@ -617,8 +617,8 @@ in_fallthru_bb_p (rtx insn, rtx succ) + if (bb == BLOCK_FOR_INSN (succ)) + return true; + +- if (find_fallthru_edge (bb)) +- bb = find_fallthru_edge (bb)->dest; ++ if (find_fallthru_edge_from (bb)) ++ bb = find_fallthru_edge_from (bb)->dest; + else + return false; + +@@ -4911,7 +4911,7 @@ move_cond_jump (rtx insn, bnd_t bnd) + next = PREV_INSN (insn); + BND_TO (bnd) = insn; + +- ft_edge = find_fallthru_edge (block_from); ++ ft_edge = find_fallthru_edge_from (block_from); + block_next = ft_edge->dest; + /* There must be a fallthrough block (or where should go + control flow in case of false jump predicate otherwise?). */ +Index: gcc-4_5-branch/gcc/vec.h +=================================================================== +--- gcc-4_5-branch.orig/gcc/vec.h ++++ gcc-4_5-branch/gcc/vec.h +@@ -188,6 +188,18 @@ along with GCC; see the file COPYING3. + + #define VEC_iterate(T,V,I,P) (VEC_OP(T,base,iterate)(VEC_BASE(V),I,&(P))) + ++/* Convenience macro for forward iteration. */ ++ ++#define FOR_EACH_VEC_ELT(T, V, I, P) \ ++ for (I = 0; VEC_iterate (T, (V), (I), (P)); ++(I)) ++ ++/* Convenience macro for reverse iteration. */ ++ ++#define FOR_EACH_VEC_ELT_REVERSE(T,V,I,P) \ ++ for (I = VEC_length (T, (V)) - 1; \ ++ VEC_iterate (T, (V), (I), (P)); \ ++ (I)--) ++ + /* Allocate new vector. + VEC(T,A) *VEC_T_A_alloc(int reserve); + |