summaryrefslogtreecommitdiffstats
path: root/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch
diff options
context:
space:
mode:
Diffstat (limited to 'meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch')
-rw-r--r--meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch3346
1 files changed, 3346 insertions, 0 deletions
diff --git a/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch
new file mode 100644
index 0000000..9b0fb0b
--- /dev/null
+++ b/meta-oe/recipes-devtools/gcc/gcc-4.5/linaro/gcc-4.5-linaro-r99474.patch
@@ -0,0 +1,3346 @@
+2011-01-14 Bernd Schmidt <bernds@codesourcery.com>
+
+ gcc/
+ * function.c (thread_prologue_and_epilogue_insns): Avoid uninitialized
+ variable.
+
+2011-01-12 Bernd Schmidt <bernds@codesourcery.com>
+
+ gcc/
+ * config/s390/s390.c (s390_emit_epilogue): Don't use gen_rtx_RETURN.
+ * config/rx/rx.c (gen_rx_rtsd_vector): Likewise.
+ * config/m68hc11/m68hc11.md (return): Likewise.
+ * config/cris/cris.c (cris_expand_return): Likewise.
+ * config/m68k/m68k.c (m68k_expand_epilogue): Likewise.
+ * config/picochip/picochip.c (picochip_expand_epilogue): Likewise.
+ * config/h8300/h8300.c (h8300_push_pop, h8300_expand_epilogue):
+ Likewise.
+ * config/v850/v850.c (expand_epilogue): Likewise.
+ * config/bfin/bfin.c (bfin_expand_call): Likewise.
+
+2011-01-04 Catherine Moore <clm@codesourcery.com>
+
+ gcc/
+ * config/rs6000/rs6000.c (rs6000_make_savres_rtx): Change
+ gen_rtx_RETURN to ret_rtx.
+ (rs6000_emit_epilogue): Likewise.
+ (rs6000_output_mi_thunk): Likewise.
+
+2011-01-03 Bernd Schmidt <bernds@codesourcery.com>
+
+ gcc/
+ * doc/tm.texi (RETURN_ADDR_REGNUM): Document.
+ * doc/md.texi (simple_return): Document pattern.
+ (return): Add a sentence to clarify.
+ * doc/rtl.texi (simple_return): Document.
+ * doc/invoke.texi (Optimize Options): Document -fshrink-wrap.
+ * common.opt (fshrink-wrap): New.
+ * opts.c (decode_options): Set it for -O2 and above.
+ * gengenrtl.c (special_rtx): PC, CC0, RETURN and SIMPLE_RETURN
+ are special.
+ * rtl.h (ANY_RETURN_P): New macro.
+ (global_rtl_index): Add GR_RETURN and GR_SIMPLE_RETURN.
+ (ret_rtx, simple_return_rtx): New macros.
+ * genemit.c (gen_exp): RETURN and SIMPLE_RETURN have unique rtxs.
+ (gen_expand, gen_split): Use ANY_RETURN_P.
+ * rtl.c (copy_rtx): RETURN and SIMPLE_RETURN are shared.
+ * emit-rtl.c (verify_rtx_sharing): Likewise.
+ (skip_consecutive_labels): Return the argument if it is a return rtx.
+ (classify_insn): Handle both kinds of return.
+ (init_emit_regs): Create global rtl for ret_rtx and simple_return_rtx.
+ * df-scan.c (df_uses_record): Handle SIMPLE_RETURN.
+ * rtl.def (SIMPLE_RETURN): New.
+ * rtlanal.c (tablejump_p): Check JUMP_LABEL for returns.
+ * final.c (final_scan_insn): Recognize both kinds of return.
+ * reorg.c (function_return_label, function_simple_return_label): New
+ static variables.
+ (end_of_function_label): Remove.
+ (simplejump_or_return_p): New static function.
+ (find_end_label): Add a new arg, KIND. All callers changed.
+ Depending on KIND, look for a label suitable for return or
+ simple_return.
+ (make_return_insns): Make corresponding changes.
+ (get_jump_flags): Check JUMP_LABELs for returns.
+ (follow_jumps): Likewise.
+ (get_branch_condition): Check target for return patterns rather
+ than NULL.
+ (own_thread_p): Likewise for thread.
+ (steal_delay_list_from_target): Check JUMP_LABELs for returns.
+ Use simplejump_or_return_p.
+ (fill_simple_delay_slots): Likewise.
+ (optimize_skip): Likewise.
+ (fill_slots_from_thread): Likewise.
+ (relax_delay_slots): Likewise.
+ (dbr_schedule): Adjust handling of end_of_function_label for the
+ two new variables.
+ * ifcvt.c (find_if_case_1): Take care when redirecting jumps to the
+ exit block.
+ (dead_or_predicable): Change NEW_DEST arg to DEST_EDGE. All callers
+ changed. Ensure that the right label is passed to redirect_jump.
+ * jump.c (condjump_p, condjump_in_parallel_p, any_condjump_p,
+ returnjump_p): Handle SIMPLE_RETURNs.
+ (delete_related_insns): Check JUMP_LABEL for returns.
+ (redirect_target): New static function.
+ (redirect_exp_1): Use it. Handle any kind of return rtx as a label
+ rather than interpreting NULL as a return.
+ (redirect_jump_1): Assert that nlabel is not NULL.
+ (redirect_jump): Likewise.
+ (redirect_jump_2): Handle any kind of return rtx as a label rather
+ than interpreting NULL as a return.
+ * dwarf2out.c (compute_barrier_args_size_1): Check JUMP_LABEL for
+ returns.
+ * function.c (emit_return_into_block): Remove useless declaration.
+ (record_hard_reg_sets, frame_required_for_rtx, gen_return_pattern,
+ requires_stack_frame_p): New static functions.
+ (emit_return_into_block): New arg SIMPLE_P. All callers changed.
+ Generate either kind of return pattern and update the JUMP_LABEL.
+ (thread_prologue_and_epilogue_insns): Implement a form of
+ shrink-wrapping. Ensure JUMP_LABELs for return insns are set.
+ * print-rtl.c (print_rtx): Handle returns in JUMP_LABELs.
+ * cfglayout.c (fixup_reorder_chain): Ensure JUMP_LABELs for returns
+ remain correct.
+ * resource.c (find_dead_or_set_registers): Check JUMP_LABELs for
+ returns.
+ (mark_target_live_regs): Don't pass a return rtx to next_active_insn.
+ * basic-block.h (force_nonfallthru_and_redirect): Declare.
+ * sched-vis.c (print_pattern): Add case for SIMPLE_RETURN.
+ * cfgrtl.c (force_nonfallthru_and_redirect): No longer static. New arg
+ JUMP_LABEL. All callers changed. Use the label when generating
+ return insns.
+
+ * config/i386/i386.md (returns, return_str, return_cond): New
+ code_iterator and corresponding code_attrs.
+ (<return_str>return): Renamed from return and adapted.
+ (<return_str>return_internal): Likewise for return_internal.
+ (<return_str>return_internal_long): Likewise for return_internal_long.
+ (<return_str>return_pop_internal): Likewise for return_pop_internal.
+ (<return_str>return_indirect_internal): Likewise for
+ return_indirect_internal.
+ * config/i386/i386.c (ix86_expand_epilogue): Expand a simple_return as
+ the last insn.
+ (ix86_pad_returns): Handle both kinds of return rtx.
+ * config/arm/arm.c (use_simple_return_p): new function.
+ (is_jump_table): Handle returns in JUMP_LABELs.
+ (output_return_instruction): New arg SIMPLE. All callers changed.
+ Use it to determine which kind of return to generate.
+ (arm_final_prescan_insn): Handle both kinds of return.
+ * config/arm/arm.md (returns, return_str, return_simple_p,
+ return_cond): New code_iterator and corresponding code_attrs.
+ (<return_str>return): Renamed from return and adapted.
+ (arm_<return_str>return): Renamed from arm_return and adapted.
+ (cond_<return_str>return): Renamed from cond_return and adapted.
+ (cond_<return_str>return_inverted): Renamed from cond_return_inverted
+ and adapted.
+ (epilogue): Use ret_rtx instead of gen_rtx_RETURN.
+ * config/arm/thumb2.md (thumb2_<return_str>return): Renamed from
+ thumb2_return and adapted.
+ * config/arm/arm.h (RETURN_ADDR_REGNUM): Define.
+ * config/arm/arm-protos.h (use_simple_return_p): Declare.
+ (output_return_instruction): Adjust declaration.
+ * config/mips/mips.c (mips_expand_epilogue): Generate a simple_return
+ as final insn.
+ * config/mips/mips.md (simple_return): New expander.
+ (*simple_return, simple_return_internal): New patterns.
+ * config/sh/sh.c (barrier_align): Handle return in a JUMP_LABEL.
+ (split_branches): Don't pass a null label to redirect_jump.
+
+ From mainline:
+ * vec.h (FOR_EACH_VEC_ELT, FOR_EACH_VEC_ELT_REVERSE): New macros.
+ * haifa-sched.c (find_fallthru_edge_from): Rename from
+ find_fallthru_edge. All callers changed.
+ * sched-int.h (find_fallthru_edge_from): Rename declaration as well.
+ * basic-block.h (find_fallthru_edge): New inline function.
+
+=== modified file 'gcc/basic-block.h'
+Index: gcc-4_5-branch/gcc/basic-block.h
+===================================================================
+--- gcc-4_5-branch.orig/gcc/basic-block.h
++++ gcc-4_5-branch/gcc/basic-block.h
+@@ -884,6 +884,7 @@ extern void flow_edge_list_print (const
+
+ /* In cfgrtl.c */
+ extern basic_block force_nonfallthru (edge);
++extern basic_block force_nonfallthru_and_redirect (edge, basic_block, rtx);
+ extern rtx block_label (basic_block);
+ extern bool purge_all_dead_edges (void);
+ extern bool purge_dead_edges (basic_block);
+@@ -1004,6 +1005,20 @@ bb_has_abnormal_pred (basic_block bb)
+ return false;
+ }
+
++/* Return the fallthru edge in EDGES if it exists, NULL otherwise. */
++static inline edge
++find_fallthru_edge (VEC(edge,gc) *edges)
++{
++ edge e;
++ edge_iterator ei;
++
++ FOR_EACH_EDGE (e, ei, edges)
++ if (e->flags & EDGE_FALLTHRU)
++ break;
++
++ return e;
++}
++
+ /* In cfgloopmanip.c. */
+ extern edge mfb_kj_edge;
+ extern bool mfb_keep_just (edge);
+Index: gcc-4_5-branch/gcc/cfganal.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/cfganal.c
++++ gcc-4_5-branch/gcc/cfganal.c
+@@ -271,6 +271,37 @@ set_edge_can_fallthru_flag (void)
+ EDGE_SUCC (bb, 0)->flags |= EDGE_CAN_FALLTHRU;
+ EDGE_SUCC (bb, 1)->flags |= EDGE_CAN_FALLTHRU;
+ }
++ /* dwarf2out expects that a NOTE_INSN_EPILOGUE_BEGIN is always paired
++ with a return or a sibcall. Ensure that this remains the case if
++ they are in different basic blocks. */
++ FOR_EACH_BB (bb)
++ {
++ edge e;
++ edge_iterator ei;
++ rtx insn, end;
++
++ end = BB_END (bb);
++ FOR_BB_INSNS (bb, insn)
++ if (GET_CODE (insn) == NOTE
++ && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG
++ && !(CALL_P (end) && SIBLING_CALL_P (end))
++ && !returnjump_p (end))
++ {
++ basic_block other_bb = NULL;
++ FOR_EACH_EDGE (e, ei, bb->succs)
++ {
++ if (e->flags & EDGE_FALLTHRU)
++ other_bb = e->dest;
++ else
++ e->flags &= ~EDGE_CAN_FALLTHRU;
++ }
++ FOR_EACH_EDGE (e, ei, other_bb->preds)
++ {
++ if (!(e->flags & EDGE_FALLTHRU))
++ e->flags &= ~EDGE_CAN_FALLTHRU;
++ }
++ }
++ }
+ }
+
+ /* Find unreachable blocks. An unreachable block will have 0 in
+Index: gcc-4_5-branch/gcc/cfglayout.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/cfglayout.c
++++ gcc-4_5-branch/gcc/cfglayout.c
+@@ -766,6 +766,7 @@ fixup_reorder_chain (void)
+ {
+ edge e_fall, e_taken, e;
+ rtx bb_end_insn;
++ rtx ret_label = NULL_RTX;
+ basic_block nb;
+ edge_iterator ei;
+
+@@ -785,6 +786,7 @@ fixup_reorder_chain (void)
+ bb_end_insn = BB_END (bb);
+ if (JUMP_P (bb_end_insn))
+ {
++ ret_label = JUMP_LABEL (bb_end_insn);
+ if (any_condjump_p (bb_end_insn))
+ {
+ /* This might happen if the conditional jump has side
+@@ -899,7 +901,7 @@ fixup_reorder_chain (void)
+ }
+
+ /* We got here if we need to add a new jump insn. */
+- nb = force_nonfallthru (e_fall);
++ nb = force_nonfallthru_and_redirect (e_fall, e_fall->dest, ret_label);
+ if (nb)
+ {
+ nb->il.rtl->visited = 1;
+@@ -1118,24 +1120,30 @@ extern bool cfg_layout_can_duplicate_bb_
+ bool
+ cfg_layout_can_duplicate_bb_p (const_basic_block bb)
+ {
++ rtx insn;
++
+ /* Do not attempt to duplicate tablejumps, as we need to unshare
+ the dispatch table. This is difficult to do, as the instructions
+ computing jump destination may be hoisted outside the basic block. */
+ if (tablejump_p (BB_END (bb), NULL, NULL))
+ return false;
+
+- /* Do not duplicate blocks containing insns that can't be copied. */
+- if (targetm.cannot_copy_insn_p)
++ insn = BB_HEAD (bb);
++ while (1)
+ {
+- rtx insn = BB_HEAD (bb);
+- while (1)
+- {
+- if (INSN_P (insn) && targetm.cannot_copy_insn_p (insn))
+- return false;
+- if (insn == BB_END (bb))
+- break;
+- insn = NEXT_INSN (insn);
+- }
++ /* Do not duplicate blocks containing insns that can't be copied. */
++ if (INSN_P (insn) && targetm.cannot_copy_insn_p
++ && targetm.cannot_copy_insn_p (insn))
++ return false;
++ /* dwarf2out expects that these notes are always paired with a
++ returnjump or sibling call. */
++ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG
++ && !returnjump_p (BB_END (bb))
++ && (!CALL_P (BB_END (bb)) || !SIBLING_CALL_P (BB_END (bb))))
++ return false;
++ if (insn == BB_END (bb))
++ break;
++ insn = NEXT_INSN (insn);
+ }
+
+ return true;
+@@ -1180,6 +1188,9 @@ duplicate_insn_chain (rtx from, rtx to)
+ break;
+ }
+ copy = emit_copy_of_insn_after (insn, get_last_insn ());
++ if (JUMP_P (insn) && JUMP_LABEL (insn) != NULL_RTX
++ && ANY_RETURN_P (JUMP_LABEL (insn)))
++ JUMP_LABEL (copy) = JUMP_LABEL (insn);
+ maybe_copy_epilogue_insn (insn, copy);
+ break;
+
+Index: gcc-4_5-branch/gcc/cfgrtl.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/cfgrtl.c
++++ gcc-4_5-branch/gcc/cfgrtl.c
+@@ -1107,10 +1107,13 @@ rtl_redirect_edge_and_branch (edge e, ba
+ }
+
+ /* Like force_nonfallthru below, but additionally performs redirection
+- Used by redirect_edge_and_branch_force. */
++ Used by redirect_edge_and_branch_force. JUMP_LABEL is used only
++ when redirecting to the EXIT_BLOCK, it is either a return or a
++ simple_return rtx indicating which kind of returnjump to create.
++ It should be NULL otherwise. */
+
+-static basic_block
+-force_nonfallthru_and_redirect (edge e, basic_block target)
++basic_block
++force_nonfallthru_and_redirect (edge e, basic_block target, rtx jump_label)
+ {
+ basic_block jump_block, new_bb = NULL, src = e->src;
+ rtx note;
+@@ -1242,11 +1245,25 @@ force_nonfallthru_and_redirect (edge e,
+ e->flags &= ~EDGE_FALLTHRU;
+ if (target == EXIT_BLOCK_PTR)
+ {
++ if (jump_label == ret_rtx)
++ {
+ #ifdef HAVE_return
+- emit_jump_insn_after_setloc (gen_return (), BB_END (jump_block), loc);
++ emit_jump_insn_after_setloc (gen_return (), BB_END (jump_block),
++ loc);
+ #else
+- gcc_unreachable ();
++ gcc_unreachable ();
+ #endif
++ }
++ else
++ {
++ gcc_assert (jump_label == simple_return_rtx);
++#ifdef HAVE_simple_return
++ emit_jump_insn_after_setloc (gen_simple_return (),
++ BB_END (jump_block), loc);
++#else
++ gcc_unreachable ();
++#endif
++ }
+ }
+ else
+ {
+@@ -1273,7 +1290,7 @@ force_nonfallthru_and_redirect (edge e,
+ basic_block
+ force_nonfallthru (edge e)
+ {
+- return force_nonfallthru_and_redirect (e, e->dest);
++ return force_nonfallthru_and_redirect (e, e->dest, NULL_RTX);
+ }
+
+ /* Redirect edge even at the expense of creating new jump insn or
+@@ -1290,7 +1307,7 @@ rtl_redirect_edge_and_branch_force (edge
+ /* In case the edge redirection failed, try to force it to be non-fallthru
+ and redirect newly created simplejump. */
+ df_set_bb_dirty (e->src);
+- return force_nonfallthru_and_redirect (e, target);
++ return force_nonfallthru_and_redirect (e, target, NULL_RTX);
+ }
+
+ /* The given edge should potentially be a fallthru edge. If that is in
+Index: gcc-4_5-branch/gcc/common.opt
+===================================================================
+--- gcc-4_5-branch.orig/gcc/common.opt
++++ gcc-4_5-branch/gcc/common.opt
+@@ -1147,6 +1147,11 @@ fshow-column
+ Common C ObjC C++ ObjC++ Report Var(flag_show_column) Init(1)
+ Show column numbers in diagnostics, when available. Default on
+
++fshrink-wrap
++Common Report Var(flag_shrink_wrap) Optimization
++Emit function prologues only before parts of the function that need it,
++rather than at the top of the function.
++
+ fsignaling-nans
+ Common Report Var(flag_signaling_nans) Optimization
+ Disable optimizations observable by IEEE signaling NaNs
+Index: gcc-4_5-branch/gcc/config/arm/arm-protos.h
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/arm/arm-protos.h
++++ gcc-4_5-branch/gcc/config/arm/arm-protos.h
+@@ -26,6 +26,7 @@
+ extern void arm_override_options (void);
+ extern void arm_optimization_options (int, int);
+ extern int use_return_insn (int, rtx);
++extern bool use_simple_return_p (void);
+ extern enum reg_class arm_regno_class (int);
+ extern void arm_load_pic_register (unsigned long);
+ extern int arm_volatile_func (void);
+@@ -137,7 +138,7 @@ extern int arm_address_offset_is_imm (rt
+ extern const char *output_add_immediate (rtx *);
+ extern const char *arithmetic_instr (rtx, int);
+ extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
+-extern const char *output_return_instruction (rtx, int, int);
++extern const char *output_return_instruction (rtx, bool, bool, bool);
+ extern void arm_poke_function_name (FILE *, const char *);
+ extern void arm_print_operand (FILE *, rtx, int);
+ extern void arm_print_operand_address (FILE *, rtx);
+Index: gcc-4_5-branch/gcc/config/arm/arm.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/arm/arm.c
++++ gcc-4_5-branch/gcc/config/arm/arm.c
+@@ -2163,6 +2163,18 @@ arm_trampoline_adjust_address (rtx addr)
+ return addr;
+ }
+
++/* Return true if we should try to use a simple_return insn, i.e. perform
++ shrink-wrapping if possible. This is the case if we need to emit a
++ prologue, which we can test by looking at the offsets. */
++bool
++use_simple_return_p (void)
++{
++ arm_stack_offsets *offsets;
++
++ offsets = arm_get_frame_offsets ();
++ return offsets->outgoing_args != 0;
++}
++
+ /* Return 1 if it is possible to return using a single instruction.
+ If SIBLING is non-null, this is a test for a return before a sibling
+ call. SIBLING is the call insn, so we can examine its register usage. */
+@@ -11284,6 +11296,7 @@ is_jump_table (rtx insn)
+
+ if (GET_CODE (insn) == JUMP_INSN
+ && JUMP_LABEL (insn) != NULL
++ && !ANY_RETURN_P (JUMP_LABEL (insn))
+ && ((table = next_real_insn (JUMP_LABEL (insn)))
+ == next_real_insn (insn))
+ && table != NULL
+@@ -14168,7 +14181,7 @@ arm_get_vfp_saved_size (void)
+ /* Generate a function exit sequence. If REALLY_RETURN is false, then do
+ everything bar the final return instruction. */
+ const char *
+-output_return_instruction (rtx operand, int really_return, int reverse)
++output_return_instruction (rtx operand, bool really_return, bool reverse, bool simple)
+ {
+ char conditional[10];
+ char instr[100];
+@@ -14206,10 +14219,15 @@ output_return_instruction (rtx operand,
+
+ sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
+
+- cfun->machine->return_used_this_function = 1;
++ if (simple)
++ live_regs_mask = 0;
++ else
++ {
++ cfun->machine->return_used_this_function = 1;
+
+- offsets = arm_get_frame_offsets ();
+- live_regs_mask = offsets->saved_regs_mask;
++ offsets = arm_get_frame_offsets ();
++ live_regs_mask = offsets->saved_regs_mask;
++ }
+
+ if (live_regs_mask)
+ {
+@@ -17108,6 +17126,7 @@ arm_final_prescan_insn (rtx insn)
+
+ /* If we start with a return insn, we only succeed if we find another one. */
+ int seeking_return = 0;
++ enum rtx_code return_code = UNKNOWN;
+
+ /* START_INSN will hold the insn from where we start looking. This is the
+ first insn after the following code_label if REVERSE is true. */
+@@ -17146,7 +17165,7 @@ arm_final_prescan_insn (rtx insn)
+ else
+ return;
+ }
+- else if (GET_CODE (body) == RETURN)
++ else if (ANY_RETURN_P (body))
+ {
+ start_insn = next_nonnote_insn (start_insn);
+ if (GET_CODE (start_insn) == BARRIER)
+@@ -17157,6 +17176,7 @@ arm_final_prescan_insn (rtx insn)
+ {
+ reverse = TRUE;
+ seeking_return = 1;
++ return_code = GET_CODE (body);
+ }
+ else
+ return;
+@@ -17197,11 +17217,15 @@ arm_final_prescan_insn (rtx insn)
+ label = XEXP (XEXP (SET_SRC (body), 2), 0);
+ then_not_else = FALSE;
+ }
+- else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
+- seeking_return = 1;
+- else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
++ else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
++ {
++ seeking_return = 1;
++ return_code = GET_CODE (XEXP (SET_SRC (body), 1));
++ }
++ else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
+ {
+ seeking_return = 1;
++ return_code = GET_CODE (XEXP (SET_SRC (body), 2));
+ then_not_else = FALSE;
+ }
+ else
+@@ -17302,8 +17326,7 @@ arm_final_prescan_insn (rtx insn)
+ && !use_return_insn (TRUE, NULL)
+ && !optimize_size)
+ fail = TRUE;
+- else if (GET_CODE (scanbody) == RETURN
+- && seeking_return)
++ else if (GET_CODE (scanbody) == return_code)
+ {
+ arm_ccfsm_state = 2;
+ succeed = TRUE;
+Index: gcc-4_5-branch/gcc/config/arm/arm.h
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/arm/arm.h
++++ gcc-4_5-branch/gcc/config/arm/arm.h
+@@ -2622,6 +2622,8 @@ extern int making_const_table;
+ #define RETURN_ADDR_RTX(COUNT, FRAME) \
+ arm_return_addr (COUNT, FRAME)
+
++#define RETURN_ADDR_REGNUM LR_REGNUM
++
+ /* Mask of the bits in the PC that contain the real return address
+ when running in 26-bit mode. */
+ #define RETURN_ADDR_MASK26 (0x03fffffc)
+Index: gcc-4_5-branch/gcc/config/arm/arm.md
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/arm/arm.md
++++ gcc-4_5-branch/gcc/config/arm/arm.md
+@@ -8882,66 +8882,72 @@
+ [(set_attr "type" "call")]
+ )
+
+-(define_expand "return"
+- [(return)]
+- "TARGET_32BIT && USE_RETURN_INSN (FALSE)"
++;; Both kinds of return insn.
++(define_code_iterator returns [return simple_return])
++(define_code_attr return_str [(return "") (simple_return "simple_")])
++(define_code_attr return_simple_p [(return "false") (simple_return "true")])
++(define_code_attr return_cond [(return " && USE_RETURN_INSN (FALSE)")
++ (simple_return " && use_simple_return_p ()")])
++
++(define_expand "<return_str>return"
++ [(returns)]
++ "TARGET_32BIT<return_cond>"
+ "")
+
+-;; Often the return insn will be the same as loading from memory, so set attr
+-(define_insn "*arm_return"
+- [(return)]
+- "TARGET_ARM && USE_RETURN_INSN (FALSE)"
+- "*
+- {
+- if (arm_ccfsm_state == 2)
+- {
+- arm_ccfsm_state += 2;
+- return \"\";
+- }
+- return output_return_instruction (const_true_rtx, TRUE, FALSE);
+- }"
++(define_insn "*arm_<return_str>return"
++ [(returns)]
++ "TARGET_ARM<return_cond>"
++{
++ if (arm_ccfsm_state == 2)
++ {
++ arm_ccfsm_state += 2;
++ return "";
++ }
++ return output_return_instruction (const_true_rtx, true, false,
++ <return_simple_p>);
++}
+ [(set_attr "type" "load1")
+ (set_attr "length" "12")
+ (set_attr "predicable" "yes")]
+ )
+
+-(define_insn "*cond_return"
++(define_insn "*cond_<return_str>return"
+ [(set (pc)
+ (if_then_else (match_operator 0 "arm_comparison_operator"
+ [(match_operand 1 "cc_register" "") (const_int 0)])
+- (return)
++ (returns)
+ (pc)))]
+- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+- "*
+- {
+- if (arm_ccfsm_state == 2)
+- {
+- arm_ccfsm_state += 2;
+- return \"\";
+- }
+- return output_return_instruction (operands[0], TRUE, FALSE);
+- }"
++ "TARGET_ARM<return_cond>"
++{
++ if (arm_ccfsm_state == 2)
++ {
++ arm_ccfsm_state += 2;
++ return "";
++ }
++ return output_return_instruction (operands[0], true, false,
++ <return_simple_p>);
++}
+ [(set_attr "conds" "use")
+ (set_attr "length" "12")
+ (set_attr "type" "load1")]
+ )
+
+-(define_insn "*cond_return_inverted"
++(define_insn "*cond_<return_str>return_inverted"
+ [(set (pc)
+ (if_then_else (match_operator 0 "arm_comparison_operator"
+ [(match_operand 1 "cc_register" "") (const_int 0)])
+ (pc)
+- (return)))]
+- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+- "*
+- {
+- if (arm_ccfsm_state == 2)
+- {
+- arm_ccfsm_state += 2;
+- return \"\";
+- }
+- return output_return_instruction (operands[0], TRUE, TRUE);
+- }"
++ (returns)))]
++ "TARGET_ARM<return_cond>"
++{
++ if (arm_ccfsm_state == 2)
++ {
++ arm_ccfsm_state += 2;
++ return "";
++ }
++ return output_return_instruction (operands[0], true, true,
++ <return_simple_p>);
++}
+ [(set_attr "conds" "use")
+ (set_attr "length" "12")
+ (set_attr "type" "load1")]
+@@ -10809,8 +10815,7 @@
+ DONE;
+ }
+ emit_jump_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+- gen_rtvec (1,
+- gen_rtx_RETURN (VOIDmode)),
++ gen_rtvec (1, ret_rtx),
+ VUNSPEC_EPILOGUE));
+ DONE;
+ "
+@@ -10827,7 +10832,7 @@
+ "TARGET_32BIT"
+ "*
+ if (use_return_insn (FALSE, next_nonnote_insn (insn)))
+- return output_return_instruction (const_true_rtx, FALSE, FALSE);
++ return output_return_instruction (const_true_rtx, false, false, false);
+ return arm_output_epilogue (next_nonnote_insn (insn));
+ "
+ ;; Length is absolute worst case
+Index: gcc-4_5-branch/gcc/config/arm/thumb2.md
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/arm/thumb2.md
++++ gcc-4_5-branch/gcc/config/arm/thumb2.md
+@@ -1020,16 +1020,15 @@
+
+ ;; Note: this is not predicable, to avoid issues with linker-generated
+ ;; interworking stubs.
+-(define_insn "*thumb2_return"
+- [(return)]
+- "TARGET_THUMB2 && USE_RETURN_INSN (FALSE)"
+- "*
+- {
+- return output_return_instruction (const_true_rtx, TRUE, FALSE);
+- }"
++(define_insn "*thumb2_<return_str>return"
++ [(returns)]
++ "TARGET_THUMB2<return_cond>"
++{
++ return output_return_instruction (const_true_rtx, true, false,
++ <return_simple_p>);
++}
+ [(set_attr "type" "load1")
+- (set_attr "length" "12")]
+-)
++ (set_attr "length" "12")])
+
+ (define_insn_and_split "thumb2_eh_return"
+ [(unspec_volatile [(match_operand:SI 0 "s_register_operand" "r")]
+Index: gcc-4_5-branch/gcc/config/bfin/bfin.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/bfin/bfin.c
++++ gcc-4_5-branch/gcc/config/bfin/bfin.c
+@@ -2359,7 +2359,7 @@ bfin_expand_call (rtx retval, rtx fnaddr
+ XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
+ XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
+ if (sibcall)
+- XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode);
++ XVECEXP (pat, 0, n++) = ret_rtx;
+ else
+ XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
+ call = emit_call_insn (pat);
+Index: gcc-4_5-branch/gcc/config/cris/cris.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/cris/cris.c
++++ gcc-4_5-branch/gcc/config/cris/cris.c
+@@ -1771,7 +1771,7 @@ cris_expand_return (bool on_stack)
+ we do that until they're fixed. Currently, all return insns in a
+ function must be the same (not really a limiting factor) so we need
+ to check that it doesn't change half-way through. */
+- emit_jump_insn (gen_rtx_RETURN (VOIDmode));
++ emit_jump_insn (ret_rtx);
+
+ CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_RET || !on_stack);
+ CRIS_ASSERT (cfun->machine->return_type != CRIS_RETINSN_JUMP || on_stack);
+Index: gcc-4_5-branch/gcc/config/h8300/h8300.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/h8300/h8300.c
++++ gcc-4_5-branch/gcc/config/h8300/h8300.c
+@@ -691,7 +691,7 @@ h8300_push_pop (int regno, int nregs, bo
+ /* Add the return instruction. */
+ if (return_p)
+ {
+- RTVEC_ELT (vec, i) = gen_rtx_RETURN (VOIDmode);
++ RTVEC_ELT (vec, i) = ret_rtx;
+ i++;
+ }
+
+@@ -975,7 +975,7 @@ h8300_expand_epilogue (void)
+ }
+
+ if (!returned_p)
+- emit_jump_insn (gen_rtx_RETURN (VOIDmode));
++ emit_jump_insn (ret_rtx);
+ }
+
+ /* Return nonzero if the current function is an interrupt
+Index: gcc-4_5-branch/gcc/config/i386/i386.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/i386/i386.c
++++ gcc-4_5-branch/gcc/config/i386/i386.c
+@@ -9308,13 +9308,13 @@ ix86_expand_epilogue (int style)
+
+ pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+ popc, -1, true);
+- emit_jump_insn (gen_return_indirect_internal (ecx));
++ emit_jump_insn (gen_simple_return_indirect_internal (ecx));
+ }
+ else
+- emit_jump_insn (gen_return_pop_internal (popc));
++ emit_jump_insn (gen_simple_return_pop_internal (popc));
+ }
+ else
+- emit_jump_insn (gen_return_internal ());
++ emit_jump_insn (gen_simple_return_internal ());
+
+ /* Restore the state back to the state from the prologue,
+ so that it's correct for the next epilogue. */
+@@ -26615,7 +26615,7 @@ ix86_pad_returns (void)
+ rtx prev;
+ bool replace = false;
+
+- if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
++ if (!JUMP_P (ret) || !ANY_RETURN_P (PATTERN (ret))
+ || optimize_bb_for_size_p (bb))
+ continue;
+ for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
+@@ -26645,7 +26645,10 @@ ix86_pad_returns (void)
+ }
+ if (replace)
+ {
+- emit_jump_insn_before (gen_return_internal_long (), ret);
++ if (PATTERN (ret) == ret_rtx)
++ emit_jump_insn_before (gen_return_internal_long (), ret);
++ else
++ emit_jump_insn_before (gen_simple_return_internal_long (), ret);
+ delete_insn (ret);
+ }
+ }
+Index: gcc-4_5-branch/gcc/config/i386/i386.md
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/i386/i386.md
++++ gcc-4_5-branch/gcc/config/i386/i386.md
+@@ -13798,24 +13798,29 @@
+ ""
+ [(set_attr "length" "0")])
+
++(define_code_iterator returns [return simple_return])
++(define_code_attr return_str [(return "") (simple_return "simple_")])
++(define_code_attr return_cond [(return "ix86_can_use_return_insn_p ()")
++ (simple_return "")])
++
+ ;; Insn emitted into the body of a function to return from a function.
+ ;; This is only done if the function's epilogue is known to be simple.
+ ;; See comments for ix86_can_use_return_insn_p in i386.c.
+
+-(define_expand "return"
+- [(return)]
+- "ix86_can_use_return_insn_p ()"
++(define_expand "<return_str>return"
++ [(returns)]
++ "<return_cond>"
+ {
+ if (crtl->args.pops_args)
+ {
+ rtx popc = GEN_INT (crtl->args.pops_args);
+- emit_jump_insn (gen_return_pop_internal (popc));
++ emit_jump_insn (gen_<return_str>return_pop_internal (popc));
+ DONE;
+ }
+ })
+
+-(define_insn "return_internal"
+- [(return)]
++(define_insn "<return_str>return_internal"
++ [(returns)]
+ "reload_completed"
+ "ret"
+ [(set_attr "length" "1")
+@@ -13826,8 +13831,8 @@
+ ;; Used by x86_machine_dependent_reorg to avoid penalty on single byte RET
+ ;; instruction Athlon and K8 have.
+
+-(define_insn "return_internal_long"
+- [(return)
++(define_insn "<return_str>return_internal_long"
++ [(returns)
+ (unspec [(const_int 0)] UNSPEC_REP)]
+ "reload_completed"
+ "rep\;ret"
+@@ -13837,8 +13842,8 @@
+ (set_attr "prefix_rep" "1")
+ (set_attr "modrm" "0")])
+
+-(define_insn "return_pop_internal"
+- [(return)
++(define_insn "<return_str>return_pop_internal"
++ [(returns)
+ (use (match_operand:SI 0 "const_int_operand" ""))]
+ "reload_completed"
+ "ret\t%0"
+@@ -13847,8 +13852,8 @@
+ (set_attr "length_immediate" "2")
+ (set_attr "modrm" "0")])
+
+-(define_insn "return_indirect_internal"
+- [(return)
++(define_insn "<return_str>return_indirect_internal"
++ [(returns)
+ (use (match_operand:SI 0 "register_operand" "r"))]
+ "reload_completed"
+ "jmp\t%A0"
+Index: gcc-4_5-branch/gcc/config/m68hc11/m68hc11.md
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/m68hc11/m68hc11.md
++++ gcc-4_5-branch/gcc/config/m68hc11/m68hc11.md
+@@ -6576,7 +6576,7 @@
+ if (ret_size && ret_size <= 2)
+ {
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode,
+- gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
++ gen_rtvec (2, ret_rtx,
+ gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (HImode, 1)))));
+ DONE;
+@@ -6584,7 +6584,7 @@
+ if (ret_size)
+ {
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode,
+- gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
++ gen_rtvec (2, ret_rtx,
+ gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (SImode, 0)))));
+ DONE;
+Index: gcc-4_5-branch/gcc/config/m68k/m68k.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/m68k/m68k.c
++++ gcc-4_5-branch/gcc/config/m68k/m68k.c
+@@ -1366,7 +1366,7 @@ m68k_expand_epilogue (bool sibcall_p)
+ EH_RETURN_STACKADJ_RTX));
+
+ if (!sibcall_p)
+- emit_jump_insn (gen_rtx_RETURN (VOIDmode));
++ emit_jump_insn (ret_rtx);
+ }
+
+ /* Return true if X is a valid comparison operator for the dbcc
+Index: gcc-4_5-branch/gcc/config/mips/mips.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/mips/mips.c
++++ gcc-4_5-branch/gcc/config/mips/mips.c
+@@ -10497,7 +10497,8 @@ mips_expand_epilogue (bool sibcall_p)
+ regno = GP_REG_FIRST + 7;
+ else
+ regno = RETURN_ADDR_REGNUM;
+- emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, regno)));
++ emit_jump_insn (gen_simple_return_internal (gen_rtx_REG (Pmode,
++ regno)));
+ }
+ }
+
+Index: gcc-4_5-branch/gcc/config/mips/mips.md
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/mips/mips.md
++++ gcc-4_5-branch/gcc/config/mips/mips.md
+@@ -5815,6 +5815,18 @@
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")])
+
++(define_expand "simple_return"
++ [(simple_return)]
++ "!mips_can_use_return_insn ()"
++ { mips_expand_before_return (); })
++
++(define_insn "*simple_return"
++ [(simple_return)]
++ "!mips_can_use_return_insn ()"
++ "%*j\t$31%/"
++ [(set_attr "type" "jump")
++ (set_attr "mode" "none")])
++
+ ;; Normal return.
+
+ (define_insn "return_internal"
+@@ -5825,6 +5837,14 @@
+ [(set_attr "type" "jump")
+ (set_attr "mode" "none")])
+
++(define_insn "simple_return_internal"
++ [(simple_return)
++ (use (match_operand 0 "pmode_register_operand" ""))]
++ ""
++ "%*j\t%0%/"
++ [(set_attr "type" "jump")
++ (set_attr "mode" "none")])
++
+ ;; Exception return.
+ (define_insn "mips_eret"
+ [(return)
+Index: gcc-4_5-branch/gcc/config/picochip/picochip.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/picochip/picochip.c
++++ gcc-4_5-branch/gcc/config/picochip/picochip.c
+@@ -1996,7 +1996,7 @@ picochip_expand_epilogue (int is_sibling
+ rtvec p;
+ p = rtvec_alloc (2);
+
+- RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode);
++ RTVEC_ELT (p, 0) = ret_rtx;
+ RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (Pmode, LINK_REGNUM));
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+Index: gcc-4_5-branch/gcc/config/rs6000/rs6000.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/rs6000/rs6000.c
++++ gcc-4_5-branch/gcc/config/rs6000/rs6000.c
+@@ -18563,7 +18563,7 @@ rs6000_make_savres_rtx (rs6000_stack_t *
+ p = rtvec_alloc ((lr ? 4 : 3) + n_regs);
+
+ if (!savep && lr)
+- RTVEC_ELT (p, offset++) = gen_rtx_RETURN (VOIDmode);
++ RTVEC_ELT (p, offset++) = ret_rtx;
+
+ RTVEC_ELT (p, offset++)
+ = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 65));
+@@ -19638,7 +19638,7 @@ rs6000_emit_epilogue (int sibcall)
+ alloc_rname = ggc_strdup (rname);
+
+ j = 0;
+- RTVEC_ELT (p, j++) = gen_rtx_RETURN (VOIDmode);
++ RTVEC_ELT (p, j++) = ret_rtx;
+ RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (Pmode,
+ LR_REGNO));
+@@ -20254,7 +20254,7 @@ rs6000_emit_epilogue (int sibcall)
+ else
+ p = rtvec_alloc (2);
+
+- RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode);
++ RTVEC_ELT (p, 0) = ret_rtx;
+ RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
+ ? gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 65))
+ : gen_rtx_CLOBBER (VOIDmode,
+@@ -20695,7 +20695,7 @@ rs6000_output_mi_thunk (FILE *file, tree
+ gen_rtx_USE (VOIDmode,
+ gen_rtx_REG (SImode,
+ LR_REGNO)),
+- gen_rtx_RETURN (VOIDmode))));
++ ret_rtx)));
+ SIBLING_CALL_P (insn) = 1;
+ emit_barrier ();
+
+Index: gcc-4_5-branch/gcc/config/rx/rx.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/rx/rx.c
++++ gcc-4_5-branch/gcc/config/rx/rx.c
+@@ -1562,7 +1562,7 @@ gen_rx_rtsd_vector (unsigned int adjust,
+ : plus_constant (stack_pointer_rtx,
+ i * UNITS_PER_WORD)));
+
+- XVECEXP (vector, 0, count - 1) = gen_rtx_RETURN (VOIDmode);
++ XVECEXP (vector, 0, count - 1) = ret_rtx;
+
+ return vector;
+ }
+Index: gcc-4_5-branch/gcc/config/s390/s390.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/s390/s390.c
++++ gcc-4_5-branch/gcc/config/s390/s390.c
+@@ -8170,7 +8170,7 @@ s390_emit_epilogue (bool sibcall)
+
+ p = rtvec_alloc (2);
+
+- RTVEC_ELT (p, 0) = gen_rtx_RETURN (VOIDmode);
++ RTVEC_ELT (p, 0) = ret_rtx;
+ RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
+ emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+ }
+Index: gcc-4_5-branch/gcc/config/sh/sh.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/sh/sh.c
++++ gcc-4_5-branch/gcc/config/sh/sh.c
+@@ -5252,7 +5252,8 @@ barrier_align (rtx barrier_or_label)
+ }
+ if (prev
+ && JUMP_P (prev)
+- && JUMP_LABEL (prev))
++ && JUMP_LABEL (prev)
++ && !ANY_RETURN_P (JUMP_LABEL (prev)))
+ {
+ rtx x;
+ if (jump_to_next
+@@ -5951,7 +5952,7 @@ split_branches (rtx first)
+ JUMP_LABEL (insn) = far_label;
+ LABEL_NUSES (far_label)++;
+ }
+- redirect_jump (insn, NULL_RTX, 1);
++ redirect_jump (insn, ret_rtx, 1);
+ far_label = 0;
+ }
+ }
+Index: gcc-4_5-branch/gcc/config/v850/v850.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/config/v850/v850.c
++++ gcc-4_5-branch/gcc/config/v850/v850.c
+@@ -1832,7 +1832,7 @@ expand_epilogue (void)
+ {
+ restore_all = gen_rtx_PARALLEL (VOIDmode,
+ rtvec_alloc (num_restore + 2));
+- XVECEXP (restore_all, 0, 0) = gen_rtx_RETURN (VOIDmode);
++ XVECEXP (restore_all, 0, 0) = ret_rtx;
+ XVECEXP (restore_all, 0, 1)
+ = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+ gen_rtx_PLUS (Pmode,
+Index: gcc-4_5-branch/gcc/df-scan.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/df-scan.c
++++ gcc-4_5-branch/gcc/df-scan.c
+@@ -3296,6 +3296,7 @@ df_uses_record (enum df_ref_class cl, st
+ }
+
+ case RETURN:
++ case SIMPLE_RETURN:
+ break;
+
+ case ASM_OPERANDS:
+Index: gcc-4_5-branch/gcc/doc/invoke.texi
+===================================================================
+--- gcc-4_5-branch.orig/gcc/doc/invoke.texi
++++ gcc-4_5-branch/gcc/doc/invoke.texi
+@@ -5751,6 +5751,7 @@ compilation time.
+ -fipa-pure-const @gol
+ -fipa-reference @gol
+ -fmerge-constants
++-fshrink-wrap @gol
+ -fsplit-wide-types @gol
+ -ftree-builtin-call-dce @gol
+ -ftree-ccp @gol
+@@ -6506,6 +6507,12 @@ This option has no effect until one of @
+ When pipelining loops during selective scheduling, also pipeline outer loops.
+ This option has no effect until @option{-fsel-sched-pipelining} is turned on.
+
++@item -fshrink-wrap
++@opindex fshrink-wrap
++Emit function prologues only before parts of the function that need it,
++rather than at the top of the function. This flag is enabled by default at
++@option{-O} and higher.
++
+ @item -fcaller-saves
+ @opindex fcaller-saves
+ Enable values to be allocated in registers that will be clobbered by
+Index: gcc-4_5-branch/gcc/doc/md.texi
+===================================================================
+--- gcc-4_5-branch.orig/gcc/doc/md.texi
++++ gcc-4_5-branch/gcc/doc/md.texi
+@@ -4801,7 +4801,19 @@ RTL generation phase. In this case it i
+ multiple instructions are usually needed to return from a function, but
+ some class of functions only requires one instruction to implement a
+ return. Normally, the applicable functions are those which do not need
+-to save any registers or allocate stack space.
++to save any registers or allocate stack space, although some targets
++have instructions that can perform both the epilogue and function return
++in one instruction.
++
++@cindex @code{simple_return} instruction pattern
++@item @samp{simple_return}
++Subroutine return instruction. This instruction pattern name should be
++defined only if a single instruction can do all the work of returning
++from a function on a path where no epilogue is required. This pattern
++is very similar to the @code{return} instruction pattern, but it is emitted
++only by the shrink-wrapping optimization on paths where the function
++prologue has not been executed, and a function return should occur without
++any of the effects of the epilogue.
+
+ @findex reload_completed
+ @findex leaf_function_p
+Index: gcc-4_5-branch/gcc/doc/rtl.texi
+===================================================================
+--- gcc-4_5-branch.orig/gcc/doc/rtl.texi
++++ gcc-4_5-branch/gcc/doc/rtl.texi
+@@ -2888,6 +2888,13 @@ placed in @code{pc} to return to the cal
+ Note that an insn pattern of @code{(return)} is logically equivalent to
+ @code{(set (pc) (return))}, but the latter form is never used.
+
++@findex simple_return
++@item (simple_return)
++Like @code{(return)}, but truly represents only a function return, while
++@code{(return)} may represent an insn that also performs other functions
++of the function epilogue. Like @code{(return)}, this may also occur in
++conditional jumps.
++
+ @findex call
+ @item (call @var{function} @var{nargs})
+ Represents a function call. @var{function} is a @code{mem} expression
+@@ -3017,7 +3024,7 @@ Represents several side effects performe
+ brackets stand for a vector; the operand of @code{parallel} is a
+ vector of expressions. @var{x0}, @var{x1} and so on are individual
+ side effect expressions---expressions of code @code{set}, @code{call},
+-@code{return}, @code{clobber} or @code{use}.
++@code{return}, @code{simple_return}, @code{clobber} or @code{use}.
+
+ ``In parallel'' means that first all the values used in the individual
+ side-effects are computed, and second all the actual side-effects are
+@@ -3656,14 +3663,16 @@ and @code{call_insn} insns:
+ @table @code
+ @findex PATTERN
+ @item PATTERN (@var{i})
+-An expression for the side effect performed by this insn. This must be
+-one of the following codes: @code{set}, @code{call}, @code{use},
+-@code{clobber}, @code{return}, @code{asm_input}, @code{asm_output},
+-@code{addr_vec}, @code{addr_diff_vec}, @code{trap_if}, @code{unspec},
+-@code{unspec_volatile}, @code{parallel}, @code{cond_exec}, or @code{sequence}. If it is a @code{parallel},
+-each element of the @code{parallel} must be one these codes, except that
+-@code{parallel} expressions cannot be nested and @code{addr_vec} and
+-@code{addr_diff_vec} are not permitted inside a @code{parallel} expression.
++An expression for the side effect performed by this insn. This must
++be one of the following codes: @code{set}, @code{call}, @code{use},
++@code{clobber}, @code{return}, @code{simple_return}, @code{asm_input},
++@code{asm_output}, @code{addr_vec}, @code{addr_diff_vec},
++@code{trap_if}, @code{unspec}, @code{unspec_volatile},
++@code{parallel}, @code{cond_exec}, or @code{sequence}. If it is a
++@code{parallel}, each element of the @code{parallel} must be one these
++codes, except that @code{parallel} expressions cannot be nested and
++@code{addr_vec} and @code{addr_diff_vec} are not permitted inside a
++@code{parallel} expression.
+
+ @findex INSN_CODE
+ @item INSN_CODE (@var{i})
+Index: gcc-4_5-branch/gcc/doc/tm.texi
+===================================================================
+--- gcc-4_5-branch.orig/gcc/doc/tm.texi
++++ gcc-4_5-branch/gcc/doc/tm.texi
+@@ -3287,6 +3287,12 @@ Define this if the return address of a p
+ from the frame pointer of the previous stack frame.
+ @end defmac
+
++@defmac RETURN_ADDR_REGNUM
++If defined, a C expression whose value is the register number of the return
++address for the current function. Targets that pass the return address on
++the stack should not define this macro.
++@end defmac
++
+ @defmac INCOMING_RETURN_ADDR_RTX
+ A C expression whose value is RTL representing the location of the
+ incoming return address at the beginning of any function, before the
+Index: gcc-4_5-branch/gcc/dwarf2out.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/dwarf2out.c
++++ gcc-4_5-branch/gcc/dwarf2out.c
+@@ -1396,7 +1396,7 @@ compute_barrier_args_size_1 (rtx insn, H
+ {
+ rtx dest = JUMP_LABEL (insn);
+
+- if (dest)
++ if (dest && !ANY_RETURN_P (dest))
+ {
+ if (barrier_args_size [INSN_UID (dest)] < 0)
+ {
+Index: gcc-4_5-branch/gcc/emit-rtl.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/emit-rtl.c
++++ gcc-4_5-branch/gcc/emit-rtl.c
+@@ -2432,6 +2432,8 @@ verify_rtx_sharing (rtx orig, rtx insn)
+ case CODE_LABEL:
+ case PC:
+ case CC0:
++ case RETURN:
++ case SIMPLE_RETURN:
+ case SCRATCH:
+ return;
+ /* SCRATCH must be shared because they represent distinct values. */
+@@ -3323,14 +3325,17 @@ prev_label (rtx insn)
+ return insn;
+ }
+
+-/* Return the last label to mark the same position as LABEL. Return null
+- if LABEL itself is null. */
++/* Return the last label to mark the same position as LABEL. Return LABEL
++ itself if it is null or any return rtx. */
+
+ rtx
+ skip_consecutive_labels (rtx label)
+ {
+ rtx insn;
+
++ if (label && ANY_RETURN_P (label))
++ return label;
++
+ for (insn = label; insn != 0 && !INSN_P (insn); insn = NEXT_INSN (insn))
+ if (LABEL_P (insn))
+ label = insn;
+@@ -5209,7 +5214,7 @@ classify_insn (rtx x)
+ return CODE_LABEL;
+ if (GET_CODE (x) == CALL)
+ return CALL_INSN;
+- if (GET_CODE (x) == RETURN)
++ if (GET_CODE (x) == RETURN || GET_CODE (x) == SIMPLE_RETURN)
+ return JUMP_INSN;
+ if (GET_CODE (x) == SET)
+ {
+@@ -5715,8 +5720,10 @@ init_emit_regs (void)
+ init_reg_modes_target ();
+
+ /* Assign register numbers to the globally defined register rtx. */
+- pc_rtx = gen_rtx_PC (VOIDmode);
+- cc0_rtx = gen_rtx_CC0 (VOIDmode);
++ pc_rtx = gen_rtx_fmt_ (PC, VOIDmode);
++ ret_rtx = gen_rtx_fmt_ (RETURN, VOIDmode);
++ simple_return_rtx = gen_rtx_fmt_ (SIMPLE_RETURN, VOIDmode);
++ cc0_rtx = gen_rtx_fmt_ (CC0, VOIDmode);
+ stack_pointer_rtx = gen_raw_REG (Pmode, STACK_POINTER_REGNUM);
+ frame_pointer_rtx = gen_raw_REG (Pmode, FRAME_POINTER_REGNUM);
+ hard_frame_pointer_rtx = gen_raw_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
+Index: gcc-4_5-branch/gcc/final.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/final.c
++++ gcc-4_5-branch/gcc/final.c
+@@ -2428,7 +2428,7 @@ final_scan_insn (rtx insn, FILE *file, i
+ delete_insn (insn);
+ break;
+ }
+- else if (GET_CODE (SET_SRC (body)) == RETURN)
++ else if (ANY_RETURN_P (SET_SRC (body)))
+ /* Replace (set (pc) (return)) with (return). */
+ PATTERN (insn) = body = SET_SRC (body);
+
+Index: gcc-4_5-branch/gcc/function.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/function.c
++++ gcc-4_5-branch/gcc/function.c
+@@ -147,9 +147,6 @@ extern tree debug_find_var_in_block_tree
+ can always export `prologue_epilogue_contains'. */
+ static void record_insns (rtx, rtx, htab_t *) ATTRIBUTE_UNUSED;
+ static bool contains (const_rtx, htab_t);
+-#ifdef HAVE_return
+-static void emit_return_into_block (basic_block);
+-#endif
+ static void prepare_function_start (void);
+ static void do_clobber_return_reg (rtx, void *);
+ static void do_use_return_reg (rtx, void *);
+@@ -4987,35 +4984,190 @@ prologue_epilogue_contains (const_rtx in
+ return 0;
+ }
+
++#ifdef HAVE_simple_return
++/* This collects sets and clobbers of hard registers in a HARD_REG_SET,
++ which is pointed to by DATA. */
++static void
++record_hard_reg_sets (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
++{
++ HARD_REG_SET *pset = (HARD_REG_SET *)data;
++ if (REG_P (x) && REGNO (x) < FIRST_PSEUDO_REGISTER)
++ {
++ int nregs = hard_regno_nregs[REGNO (x)][GET_MODE (x)];
++ while (nregs-- > 0)
++ SET_HARD_REG_BIT (*pset, REGNO (x) + nregs);
++ }
++}
++
++/* A subroutine of requires_stack_frame_p, called via for_each_rtx.
++ If any change is made, set CHANGED
++ to true. */
++
++static int
++frame_required_for_rtx (rtx *loc, void *data ATTRIBUTE_UNUSED)
++{
++ rtx x = *loc;
++ if (x == stack_pointer_rtx || x == hard_frame_pointer_rtx
++ || x == arg_pointer_rtx || x == pic_offset_table_rtx
++#ifdef RETURN_ADDR_REGNUM
++ || (REG_P (x) && REGNO (x) == RETURN_ADDR_REGNUM)
++#endif
++ )
++ return 1;
++ return 0;
++}
++
++static bool
++requires_stack_frame_p (rtx insn)
++{
++ HARD_REG_SET hardregs;
++ unsigned regno;
++
++ if (!INSN_P (insn) || DEBUG_INSN_P (insn))
++ return false;
++ if (CALL_P (insn))
++ return !SIBLING_CALL_P (insn);
++ if (for_each_rtx (&PATTERN (insn), frame_required_for_rtx, NULL))
++ return true;
++ CLEAR_HARD_REG_SET (hardregs);
++ note_stores (PATTERN (insn), record_hard_reg_sets, &hardregs);
++ AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set);
++ for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
++ if (TEST_HARD_REG_BIT (hardregs, regno)
++ && df_regs_ever_live_p (regno))
++ return true;
++ return false;
++}
++#endif
++
+ #ifdef HAVE_return
+-/* Insert gen_return at the end of block BB. This also means updating
+- block_for_insn appropriately. */
++
++static rtx
++gen_return_pattern (bool simple_p)
++{
++#ifdef HAVE_simple_return
++ return simple_p ? gen_simple_return () : gen_return ();
++#else
++ gcc_assert (!simple_p);
++ return gen_return ();
++#endif
++}
++
++/* Insert an appropriate return pattern at the end of block BB. This
++ also means updating block_for_insn appropriately. */
+
+ static void
+-emit_return_into_block (basic_block bb)
++emit_return_into_block (bool simple_p, basic_block bb)
+ {
+- emit_jump_insn_after (gen_return (), BB_END (bb));
++ rtx jump;
++ jump = emit_jump_insn_after (gen_return_pattern (simple_p), BB_END (bb));
++ JUMP_LABEL (jump) = simple_p ? simple_return_rtx : ret_rtx;
+ }
+-#endif /* HAVE_return */
++#endif
+
+ /* Generate the prologue and epilogue RTL if the machine supports it. Thread
+ this into place with notes indicating where the prologue ends and where
+- the epilogue begins. Update the basic block information when possible. */
++ the epilogue begins. Update the basic block information when possible.
++
++ Notes on epilogue placement:
++ There are several kinds of edges to the exit block:
++ * a single fallthru edge from LAST_BB
++ * possibly, edges from blocks containing sibcalls
++ * possibly, fake edges from infinite loops
++
++ The epilogue is always emitted on the fallthru edge from the last basic
++ block in the function, LAST_BB, into the exit block.
++
++ If LAST_BB is empty except for a label, it is the target of every
++ other basic block in the function that ends in a return. If a
++ target has a return or simple_return pattern (possibly with
++ conditional variants), these basic blocks can be changed so that a
++ return insn is emitted into them, and their target is adjusted to
++ the real exit block.
++
++ Notes on shrink wrapping: We implement a fairly conservative
++ version of shrink-wrapping rather than the textbook one. We only
++ generate a single prologue and a single epilogue. This is
++ sufficient to catch a number of interesting cases involving early
++ exits.
++
++ First, we identify the blocks that require the prologue to occur before
++ them. These are the ones that modify a call-saved register, or reference
++ any of the stack or frame pointer registers. To simplify things, we then
++ mark everything reachable from these blocks as also requiring a prologue.
++ This takes care of loops automatically, and avoids the need to examine
++ whether MEMs reference the frame, since it is sufficient to check for
++ occurrences of the stack or frame pointer.
++
++ We then compute the set of blocks for which the need for a prologue
++ is anticipatable (borrowing terminology from the shrink-wrapping
++ description in Muchnick's book). These are the blocks which either
++ require a prologue themselves, or those that have only successors
++ where the prologue is anticipatable. The prologue needs to be
++ inserted on all edges from BB1->BB2 where BB2 is in ANTIC and BB1
++ is not. For the moment, we ensure that only one such edge exists.
++
++ The epilogue is placed as described above, but we make a
++ distinction between inserting return and simple_return patterns
++ when modifying other blocks that end in a return. Blocks that end
++ in a sibcall omit the sibcall_epilogue if the block is not in
++ ANTIC. */
+
+ static void
+ thread_prologue_and_epilogue_insns (void)
+ {
+ int inserted = 0;
++ basic_block last_bb;
++ bool last_bb_active;
++#ifdef HAVE_simple_return
++ bool unconverted_simple_returns = false;
++ basic_block simple_return_block = NULL;
++#endif
++ rtx returnjump ATTRIBUTE_UNUSED;
++ rtx seq ATTRIBUTE_UNUSED, epilogue_end ATTRIBUTE_UNUSED;
++ rtx prologue_seq ATTRIBUTE_UNUSED, split_prologue_seq ATTRIBUTE_UNUSED;
++ edge entry_edge, orig_entry_edge, exit_fallthru_edge;
+ edge e;
+-#if defined (HAVE_sibcall_epilogue) || defined (HAVE_epilogue) || defined (HAVE_return) || defined (HAVE_prologue)
+- rtx seq;
+-#endif
+-#if defined (HAVE_epilogue) || defined(HAVE_return)
+- rtx epilogue_end = NULL_RTX;
+-#endif
+ edge_iterator ei;
++ bitmap_head bb_flags;
++
++ df_analyze ();
+
+ rtl_profile_for_bb (ENTRY_BLOCK_PTR);
++
++ epilogue_end = NULL_RTX;
++ returnjump = NULL_RTX;
++
++ /* Can't deal with multiple successors of the entry block at the
++ moment. Function should always have at least one entry
++ point. */
++ gcc_assert (single_succ_p (ENTRY_BLOCK_PTR));
++ entry_edge = single_succ_edge (ENTRY_BLOCK_PTR);
++ orig_entry_edge = entry_edge;
++
++ exit_fallthru_edge = find_fallthru_edge (EXIT_BLOCK_PTR->preds);
++ if (exit_fallthru_edge != NULL)
++ {
++ rtx label;
++
++ last_bb = exit_fallthru_edge->src;
++ /* Test whether there are active instructions in the last block. */
++ label = BB_END (last_bb);
++ while (label && !LABEL_P (label))
++ {
++ if (active_insn_p (label))
++ break;
++ label = PREV_INSN (label);
++ }
++
++ last_bb_active = BB_HEAD (last_bb) != label || !LABEL_P (label);
++ }
++ else
++ {
++ last_bb = NULL;
++ last_bb_active = false;
++ }
++
+ #ifdef HAVE_prologue
+ if (HAVE_prologue)
+ {
+@@ -5040,20 +5192,169 @@ thread_prologue_and_epilogue_insns (void
+ emit_insn (gen_blockage ());
+ #endif
+
+- seq = get_insns ();
++ prologue_seq = get_insns ();
+ end_sequence ();
+ set_insn_locators (seq, prologue_locator);
++ }
++#endif
+
+- /* Can't deal with multiple successors of the entry block
+- at the moment. Function should always have at least one
+- entry point. */
+- gcc_assert (single_succ_p (ENTRY_BLOCK_PTR));
++ bitmap_initialize (&bb_flags, &bitmap_default_obstack);
+
+- insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
+- inserted = 1;
++#ifdef HAVE_simple_return
++ /* Try to perform a kind of shrink-wrapping, making sure the
++ prologue/epilogue is emitted only around those parts of the
++ function that require it. */
++
++ if (flag_shrink_wrap && HAVE_simple_return && !flag_non_call_exceptions
++ && HAVE_prologue && !crtl->calls_eh_return)
++ {
++ HARD_REG_SET prologue_clobbered, live_on_edge;
++ rtx p_insn;
++ VEC(basic_block, heap) *vec;
++ basic_block bb;
++ bitmap_head bb_antic_flags;
++ bitmap_head bb_on_list;
++
++ bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack);
++ bitmap_initialize (&bb_on_list, &bitmap_default_obstack);
++
++ vec = VEC_alloc (basic_block, heap, n_basic_blocks);
++
++ FOR_EACH_BB (bb)
++ {
++ rtx insn;
++ FOR_BB_INSNS (bb, insn)
++ {
++ if (requires_stack_frame_p (insn))
++ {
++ bitmap_set_bit (&bb_flags, bb->index);
++ VEC_quick_push (basic_block, vec, bb);
++ break;
++ }
++ }
++ }
++
++ /* For every basic block that needs a prologue, mark all blocks
++ reachable from it, so as to ensure they are also seen as
++ requiring a prologue. */
++ while (!VEC_empty (basic_block, vec))
++ {
++ basic_block tmp_bb = VEC_pop (basic_block, vec);
++ edge e;
++ edge_iterator ei;
++ FOR_EACH_EDGE (e, ei, tmp_bb->succs)
++ {
++ if (e->dest == EXIT_BLOCK_PTR
++ || bitmap_bit_p (&bb_flags, e->dest->index))
++ continue;
++ bitmap_set_bit (&bb_flags, e->dest->index);
++ VEC_quick_push (basic_block, vec, e->dest);
++ }
++ }
++ /* If the last basic block contains only a label, we'll be able
++ to convert jumps to it to (potentially conditional) return
++ insns later. This means we don't necessarily need a prologue
++ for paths reaching it. */
++ if (last_bb)
++ {
++ if (!last_bb_active)
++ bitmap_clear_bit (&bb_flags, last_bb->index);
++ else if (!bitmap_bit_p (&bb_flags, last_bb->index))
++ goto fail_shrinkwrap;
++ }
++
++ /* Now walk backwards from every block that is marked as needing
++ a prologue to compute the bb_antic_flags bitmap. */
++ bitmap_copy (&bb_antic_flags, &bb_flags);
++ FOR_EACH_BB (bb)
++ {
++ edge e;
++ edge_iterator ei;
++ if (!bitmap_bit_p (&bb_flags, bb->index))
++ continue;
++ FOR_EACH_EDGE (e, ei, bb->preds)
++ if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
++ {
++ VEC_quick_push (basic_block, vec, e->src);
++ bitmap_set_bit (&bb_on_list, e->src->index);
++ }
++ }
++ while (!VEC_empty (basic_block, vec))
++ {
++ basic_block tmp_bb = VEC_pop (basic_block, vec);
++ edge e;
++ edge_iterator ei;
++ bool all_set = true;
++
++ bitmap_clear_bit (&bb_on_list, tmp_bb->index);
++ FOR_EACH_EDGE (e, ei, tmp_bb->succs)
++ {
++ if (!bitmap_bit_p (&bb_antic_flags, e->dest->index))
++ {
++ all_set = false;
++ break;
++ }
++ }
++ if (all_set)
++ {
++ bitmap_set_bit (&bb_antic_flags, tmp_bb->index);
++ FOR_EACH_EDGE (e, ei, tmp_bb->preds)
++ if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
++ {
++ VEC_quick_push (basic_block, vec, e->src);
++ bitmap_set_bit (&bb_on_list, e->src->index);
++ }
++ }
++ }
++ /* Find exactly one edge that leads to a block in ANTIC from
++ a block that isn't. */
++ if (!bitmap_bit_p (&bb_antic_flags, entry_edge->dest->index))
++ FOR_EACH_BB (bb)
++ {
++ if (!bitmap_bit_p (&bb_antic_flags, bb->index))
++ continue;
++ FOR_EACH_EDGE (e, ei, bb->preds)
++ if (!bitmap_bit_p (&bb_antic_flags, e->src->index))
++ {
++ if (entry_edge != orig_entry_edge)
++ {
++ entry_edge = orig_entry_edge;
++ goto fail_shrinkwrap;
++ }
++ entry_edge = e;
++ }
++ }
++
++ /* Test whether the prologue is known to clobber any register
++ (other than FP or SP) which are live on the edge. */
++ CLEAR_HARD_REG_SET (prologue_clobbered);
++ for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn))
++ if (NONDEBUG_INSN_P (p_insn))
++ note_stores (PATTERN (p_insn), record_hard_reg_sets,
++ &prologue_clobbered);
++ CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
++ if (frame_pointer_needed)
++ CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
++
++ CLEAR_HARD_REG_SET (live_on_edge);
++ reg_set_to_hard_reg_set (&live_on_edge,
++ df_get_live_in (entry_edge->dest));
++ if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered))
++ entry_edge = orig_entry_edge;
++
++ fail_shrinkwrap:
++ bitmap_clear (&bb_antic_flags);
++ bitmap_clear (&bb_on_list);
++ VEC_free (basic_block, heap, vec);
+ }
+ #endif
+
++ if (prologue_seq != NULL_RTX)
++ {
++ insert_insn_on_edge (prologue_seq, entry_edge);
++ inserted = true;
++ }
++
+ /* If the exit block has no non-fake predecessors, we don't need
+ an epilogue. */
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+@@ -5063,100 +5364,130 @@ thread_prologue_and_epilogue_insns (void
+ goto epilogue_done;
+
+ rtl_profile_for_bb (EXIT_BLOCK_PTR);
++
+ #ifdef HAVE_return
+- if (optimize && HAVE_return)
++ /* If we're allowed to generate a simple return instruction, then by
++ definition we don't need a full epilogue. If the last basic
++ block before the exit block does not contain active instructions,
++ examine its predecessors and try to emit (conditional) return
++ instructions. */
++ if (optimize && !last_bb_active
++ && (HAVE_return || entry_edge != orig_entry_edge))
+ {
+- /* If we're allowed to generate a simple return instruction,
+- then by definition we don't need a full epilogue. Examine
+- the block that falls through to EXIT. If it does not
+- contain any code, examine its predecessors and try to
+- emit (conditional) return instructions. */
+-
+- basic_block last;
++ edge_iterator ei2;
++ int i;
++ basic_block bb;
+ rtx label;
++ VEC(basic_block,heap) *src_bbs;
+
+- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+- if (e->flags & EDGE_FALLTHRU)
+- break;
+- if (e == NULL)
++ if (exit_fallthru_edge == NULL)
+ goto epilogue_done;
+- last = e->src;
++ label = BB_HEAD (last_bb);
+
+- /* Verify that there are no active instructions in the last block. */
+- label = BB_END (last);
+- while (label && !LABEL_P (label))
+- {
+- if (active_insn_p (label))
+- break;
+- label = PREV_INSN (label);
+- }
++ src_bbs = VEC_alloc (basic_block, heap, EDGE_COUNT (last_bb->preds));
++ FOR_EACH_EDGE (e, ei2, last_bb->preds)
++ if (e->src != ENTRY_BLOCK_PTR)
++ VEC_quick_push (basic_block, src_bbs, e->src);
+
+- if (BB_HEAD (last) == label && LABEL_P (label))
++ FOR_EACH_VEC_ELT (basic_block, src_bbs, i, bb)
+ {
+- edge_iterator ei2;
++ bool simple_p;
++ rtx jump;
++ e = find_edge (bb, last_bb);
+
+- for (ei2 = ei_start (last->preds); (e = ei_safe_edge (ei2)); )
+- {
+- basic_block bb = e->src;
+- rtx jump;
++ jump = BB_END (bb);
+
+- if (bb == ENTRY_BLOCK_PTR)
+- {
+- ei_next (&ei2);
+- continue;
+- }
++#ifdef HAVE_simple_return
++ simple_p = (entry_edge != orig_entry_edge
++ ? !bitmap_bit_p (&bb_flags, bb->index) : false);
++#else
++ simple_p = false;
++#endif
+
+- jump = BB_END (bb);
+- if (!JUMP_P (jump) || JUMP_LABEL (jump) != label)
+- {
+- ei_next (&ei2);
+- continue;
+- }
++ if (!simple_p
++ && (!HAVE_return || !JUMP_P (jump)
++ || JUMP_LABEL (jump) != label))
++ continue;
+
+- /* If we have an unconditional jump, we can replace that
+- with a simple return instruction. */
+- if (simplejump_p (jump))
+- {
+- emit_return_into_block (bb);
+- delete_insn (jump);
+- }
++ /* If we have an unconditional jump, we can replace that
++ with a simple return instruction. */
++ if (!JUMP_P (jump))
++ {
++ emit_barrier_after (BB_END (bb));
++ emit_return_into_block (simple_p, bb);
++ }
++ else if (simplejump_p (jump))
++ {
++ emit_return_into_block (simple_p, bb);
++ delete_insn (jump);
++ }
++ else if (condjump_p (jump) && JUMP_LABEL (jump) != label)
++ {
++ basic_block new_bb;
++ edge new_e;
+
+- /* If we have a conditional jump, we can try to replace
+- that with a conditional return instruction. */
+- else if (condjump_p (jump))
+- {
+- if (! redirect_jump (jump, 0, 0))
+- {
+- ei_next (&ei2);
+- continue;
+- }
++ gcc_assert (simple_p);
++ new_bb = split_edge (e);
++ emit_barrier_after (BB_END (new_bb));
++ emit_return_into_block (simple_p, new_bb);
++#ifdef HAVE_simple_return
++ simple_return_block = new_bb;
++#endif
++ new_e = single_succ_edge (new_bb);
++ redirect_edge_succ (new_e, EXIT_BLOCK_PTR);
+
+- /* If this block has only one successor, it both jumps
+- and falls through to the fallthru block, so we can't
+- delete the edge. */
+- if (single_succ_p (bb))
+- {
+- ei_next (&ei2);
+- continue;
+- }
+- }
++ continue;
++ }
++ /* If we have a conditional jump branching to the last
++ block, we can try to replace that with a conditional
++ return instruction. */
++ else if (condjump_p (jump))
++ {
++ rtx dest;
++ if (simple_p)
++ dest = simple_return_rtx;
+ else
++ dest = ret_rtx;
++ if (! redirect_jump (jump, dest, 0))
+ {
+- ei_next (&ei2);
++#ifdef HAVE_simple_return
++ if (simple_p)
++ unconverted_simple_returns = true;
++#endif
+ continue;
+ }
+
+- /* Fix up the CFG for the successful change we just made. */
+- redirect_edge_succ (e, EXIT_BLOCK_PTR);
++ /* If this block has only one successor, it both jumps
++ and falls through to the fallthru block, so we can't
++ delete the edge. */
++ if (single_succ_p (bb))
++ continue;
++ }
++ else
++ {
++#ifdef HAVE_simple_return
++ if (simple_p)
++ unconverted_simple_returns = true;
++#endif
++ continue;
+ }
+
++ /* Fix up the CFG for the successful change we just made. */
++ redirect_edge_succ (e, EXIT_BLOCK_PTR);
++ }
++ VEC_free (basic_block, heap, src_bbs);
++
++ if (HAVE_return)
++ {
+ /* Emit a return insn for the exit fallthru block. Whether
+ this is still reachable will be determined later. */
+
+- emit_barrier_after (BB_END (last));
+- emit_return_into_block (last);
+- epilogue_end = BB_END (last);
+- single_succ_edge (last)->flags &= ~EDGE_FALLTHRU;
++ emit_barrier_after (BB_END (last_bb));
++ emit_return_into_block (false, last_bb);
++ epilogue_end = BB_END (last_bb);
++ if (JUMP_P (epilogue_end))
++ JUMP_LABEL (epilogue_end) = ret_rtx;
++ single_succ_edge (last_bb)->flags &= ~EDGE_FALLTHRU;
+ goto epilogue_done;
+ }
+ }
+@@ -5193,15 +5524,10 @@ thread_prologue_and_epilogue_insns (void
+ }
+ #endif
+
+- /* Find the edge that falls through to EXIT. Other edges may exist
+- due to RETURN instructions, but those don't need epilogues.
+- There really shouldn't be a mixture -- either all should have
+- been converted or none, however... */
++ /* If nothing falls through into the exit block, we don't need an
++ epilogue. */
+
+- FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+- if (e->flags & EDGE_FALLTHRU)
+- break;
+- if (e == NULL)
++ if (exit_fallthru_edge == NULL)
+ goto epilogue_done;
+
+ #ifdef HAVE_epilogue
+@@ -5217,25 +5543,36 @@ thread_prologue_and_epilogue_insns (void
+ set_insn_locators (seq, epilogue_locator);
+
+ seq = get_insns ();
++ returnjump = get_last_insn ();
+ end_sequence ();
+
+- insert_insn_on_edge (seq, e);
++ insert_insn_on_edge (seq, exit_fallthru_edge);
+ inserted = 1;
++ if (JUMP_P (returnjump))
++ {
++ rtx pat = PATTERN (returnjump);
++ if (GET_CODE (pat) == PARALLEL)
++ pat = XVECEXP (pat, 0, 0);
++ if (ANY_RETURN_P (pat))
++ JUMP_LABEL (returnjump) = pat;
++ else
++ JUMP_LABEL (returnjump) = ret_rtx;
++ }
+ }
+ else
+ #endif
+ {
+ basic_block cur_bb;
+
+- if (! next_active_insn (BB_END (e->src)))
++ if (! next_active_insn (BB_END (exit_fallthru_edge->src)))
+ goto epilogue_done;
+ /* We have a fall-through edge to the exit block, the source is not
+- at the end of the function, and there will be an assembler epilogue
+- at the end of the function.
+- We can't use force_nonfallthru here, because that would try to
+- use return. Inserting a jump 'by hand' is extremely messy, so
++ at the end of the function, and there will be an assembler epilogue
++ at the end of the function.
++ We can't use force_nonfallthru here, because that would try to
++ use return. Inserting a jump 'by hand' is extremely messy, so
+ we take advantage of cfg_layout_finalize using
+- fixup_fallthru_exit_predecessor. */
++ fixup_fallthru_exit_predecessor. */
+ cfg_layout_initialize (0);
+ FOR_EACH_BB (cur_bb)
+ if (cur_bb->index >= NUM_FIXED_BLOCKS
+@@ -5244,6 +5581,7 @@ thread_prologue_and_epilogue_insns (void
+ cfg_layout_finalize ();
+ }
+ epilogue_done:
++
+ default_rtl_profile ();
+
+ if (inserted)
+@@ -5260,33 +5598,93 @@ epilogue_done:
+ }
+ }
+
++#ifdef HAVE_simple_return
++ /* If there were branches to an empty LAST_BB which we tried to
++ convert to conditional simple_returns, but couldn't for some
++ reason, create a block to hold a simple_return insn and redirect
++ those remaining edges. */
++ if (unconverted_simple_returns)
++ {
++ edge_iterator ei2;
++ basic_block exit_pred = EXIT_BLOCK_PTR->prev_bb;
++
++ gcc_assert (entry_edge != orig_entry_edge);
++
++#ifdef HAVE_epilogue
++ if (simple_return_block == NULL && returnjump != NULL_RTX
++ && JUMP_LABEL (returnjump) == simple_return_rtx)
++ {
++ edge e = split_block (exit_fallthru_edge->src,
++ PREV_INSN (returnjump));
++ simple_return_block = e->dest;
++ }
++#endif
++ if (simple_return_block == NULL)
++ {
++ basic_block bb;
++ rtx start;
++
++ bb = create_basic_block (NULL, NULL, exit_pred);
++ start = emit_jump_insn_after (gen_simple_return (),
++ BB_END (bb));
++ JUMP_LABEL (start) = simple_return_rtx;
++ emit_barrier_after (start);
++
++ simple_return_block = bb;
++ make_edge (bb, EXIT_BLOCK_PTR, 0);
++ }
++
++ restart_scan:
++ for (ei2 = ei_start (last_bb->preds); (e = ei_safe_edge (ei2)); )
++ {
++ basic_block bb = e->src;
++
++ if (bb != ENTRY_BLOCK_PTR
++ && !bitmap_bit_p (&bb_flags, bb->index))
++ {
++ redirect_edge_and_branch_force (e, simple_return_block);
++ goto restart_scan;
++ }
++ ei_next (&ei2);
++
++ }
++ }
++#endif
++
+ #ifdef HAVE_sibcall_epilogue
+ /* Emit sibling epilogues before any sibling call sites. */
+ for (ei = ei_start (EXIT_BLOCK_PTR->preds); (e = ei_safe_edge (ei)); )
+ {
+ basic_block bb = e->src;
+ rtx insn = BB_END (bb);
++ rtx ep_seq;
+
+ if (!CALL_P (insn)
+- || ! SIBLING_CALL_P (insn))
++ || ! SIBLING_CALL_P (insn)
++ || (entry_edge != orig_entry_edge
++ && !bitmap_bit_p (&bb_flags, bb->index)))
+ {
+ ei_next (&ei);
+ continue;
+ }
+
+- start_sequence ();
+- emit_note (NOTE_INSN_EPILOGUE_BEG);
+- emit_insn (gen_sibcall_epilogue ());
+- seq = get_insns ();
+- end_sequence ();
++ ep_seq = gen_sibcall_epilogue ();
++ if (ep_seq)
++ {
++ start_sequence ();
++ emit_note (NOTE_INSN_EPILOGUE_BEG);
++ emit_insn (ep_seq);
++ seq = get_insns ();
++ end_sequence ();
+
+- /* Retain a map of the epilogue insns. Used in life analysis to
+- avoid getting rid of sibcall epilogue insns. Do this before we
+- actually emit the sequence. */
+- record_insns (seq, NULL, &epilogue_insn_hash);
+- set_insn_locators (seq, epilogue_locator);
++ /* Retain a map of the epilogue insns. Used in life analysis to
++ avoid getting rid of sibcall epilogue insns. Do this before we
++ actually emit the sequence. */
++ record_insns (seq, NULL, &epilogue_insn_hash);
++ set_insn_locators (seq, epilogue_locator);
+
+- emit_insn_before (seq, insn);
++ emit_insn_before (seq, insn);
++ }
+ ei_next (&ei);
+ }
+ #endif
+@@ -5311,6 +5709,8 @@ epilogue_done:
+ }
+ #endif
+
++ bitmap_clear (&bb_flags);
++
+ /* Threading the prologue and epilogue changes the artificial refs
+ in the entry and exit blocks. */
+ epilogue_completed = 1;
+Index: gcc-4_5-branch/gcc/genemit.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/genemit.c
++++ gcc-4_5-branch/gcc/genemit.c
+@@ -222,6 +222,12 @@ gen_exp (rtx x, enum rtx_code subroutine
+ case PC:
+ printf ("pc_rtx");
+ return;
++ case RETURN:
++ printf ("ret_rtx");
++ return;
++ case SIMPLE_RETURN:
++ printf ("simple_return_rtx");
++ return;
+ case CLOBBER:
+ if (REG_P (XEXP (x, 0)))
+ {
+@@ -544,8 +550,8 @@ gen_expand (rtx expand)
+ || (GET_CODE (next) == PARALLEL
+ && ((GET_CODE (XVECEXP (next, 0, 0)) == SET
+ && GET_CODE (SET_DEST (XVECEXP (next, 0, 0))) == PC)
+- || GET_CODE (XVECEXP (next, 0, 0)) == RETURN))
+- || GET_CODE (next) == RETURN)
++ || ANY_RETURN_P (XVECEXP (next, 0, 0))))
++ || ANY_RETURN_P (next))
+ printf (" emit_jump_insn (");
+ else if ((GET_CODE (next) == SET && GET_CODE (SET_SRC (next)) == CALL)
+ || GET_CODE (next) == CALL
+@@ -660,7 +666,7 @@ gen_split (rtx split)
+ || (GET_CODE (next) == PARALLEL
+ && GET_CODE (XVECEXP (next, 0, 0)) == SET
+ && GET_CODE (SET_DEST (XVECEXP (next, 0, 0))) == PC)
+- || GET_CODE (next) == RETURN)
++ || ANY_RETURN_P (next))
+ printf (" emit_jump_insn (");
+ else if ((GET_CODE (next) == SET && GET_CODE (SET_SRC (next)) == CALL)
+ || GET_CODE (next) == CALL
+Index: gcc-4_5-branch/gcc/gengenrtl.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/gengenrtl.c
++++ gcc-4_5-branch/gcc/gengenrtl.c
+@@ -146,6 +146,10 @@ special_rtx (int idx)
+ || strcmp (defs[idx].enumname, "REG") == 0
+ || strcmp (defs[idx].enumname, "SUBREG") == 0
+ || strcmp (defs[idx].enumname, "MEM") == 0
++ || strcmp (defs[idx].enumname, "PC") == 0
++ || strcmp (defs[idx].enumname, "CC0") == 0
++ || strcmp (defs[idx].enumname, "RETURN") == 0
++ || strcmp (defs[idx].enumname, "SIMPLE_RETURN") == 0
+ || strcmp (defs[idx].enumname, "CONST_VECTOR") == 0);
+ }
+
+Index: gcc-4_5-branch/gcc/haifa-sched.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/haifa-sched.c
++++ gcc-4_5-branch/gcc/haifa-sched.c
+@@ -4231,7 +4231,7 @@ xrecalloc (void *p, size_t new_nmemb, si
+ /* Helper function.
+ Find fallthru edge from PRED. */
+ edge
+-find_fallthru_edge (basic_block pred)
++find_fallthru_edge_from (basic_block pred)
+ {
+ edge e;
+ edge_iterator ei;
+@@ -4298,7 +4298,7 @@ init_before_recovery (basic_block *befor
+ edge e;
+
+ last = EXIT_BLOCK_PTR->prev_bb;
+- e = find_fallthru_edge (last);
++ e = find_fallthru_edge_from (last);
+
+ if (e)
+ {
+@@ -5234,6 +5234,11 @@ check_cfg (rtx head, rtx tail)
+ gcc_assert (/* Usual case. */
+ (EDGE_COUNT (bb->succs) > 1
+ && !BARRIER_P (NEXT_INSN (head)))
++ /* Special cases, see cfglayout.c:
++ fixup_reorder_chain. */
++ || (EDGE_COUNT (bb->succs) == 1
++ && (!onlyjump_p (head)
++ || returnjump_p (head)))
+ /* Or jump to the next instruction. */
+ || (EDGE_COUNT (bb->succs) == 1
+ && (BB_HEAD (EDGE_I (bb->succs, 0)->dest)
+Index: gcc-4_5-branch/gcc/ifcvt.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/ifcvt.c
++++ gcc-4_5-branch/gcc/ifcvt.c
+@@ -105,7 +105,7 @@ static int find_if_case_1 (basic_block,
+ static int find_if_case_2 (basic_block, edge, edge);
+ static int find_memory (rtx *, void *);
+ static int dead_or_predicable (basic_block, basic_block, basic_block,
+- basic_block, int);
++ edge, int);
+ static void noce_emit_move_insn (rtx, rtx);
+ static rtx block_has_only_trap (basic_block);
+
+@@ -3791,6 +3791,7 @@ find_if_case_1 (basic_block test_bb, edg
+ basic_block then_bb = then_edge->dest;
+ basic_block else_bb = else_edge->dest;
+ basic_block new_bb;
++ rtx else_target = NULL_RTX;
+ int then_bb_index;
+
+ /* If we are partitioning hot/cold basic blocks, we don't want to
+@@ -3840,9 +3841,16 @@ find_if_case_1 (basic_block test_bb, edg
+ predictable_edge_p (then_edge)))))
+ return FALSE;
+
++ if (else_bb == EXIT_BLOCK_PTR)
++ {
++ rtx jump = BB_END (else_edge->src);
++ gcc_assert (JUMP_P (jump));
++ else_target = JUMP_LABEL (jump);
++ }
++
+ /* Registers set are dead, or are predicable. */
+ if (! dead_or_predicable (test_bb, then_bb, else_bb,
+- single_succ (then_bb), 1))
++ single_succ_edge (then_bb), 1))
+ return FALSE;
+
+ /* Conversion went ok, including moving the insns and fixing up the
+@@ -3859,6 +3867,9 @@ find_if_case_1 (basic_block test_bb, edg
+ redirect_edge_succ (FALLTHRU_EDGE (test_bb), else_bb);
+ new_bb = 0;
+ }
++ else if (else_bb == EXIT_BLOCK_PTR)
++ new_bb = force_nonfallthru_and_redirect (FALLTHRU_EDGE (test_bb),
++ else_bb, else_target);
+ else
+ new_bb = redirect_edge_and_branch_force (FALLTHRU_EDGE (test_bb),
+ else_bb);
+@@ -3957,7 +3968,7 @@ find_if_case_2 (basic_block test_bb, edg
+ return FALSE;
+
+ /* Registers set are dead, or are predicable. */
+- if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ->dest, 0))
++ if (! dead_or_predicable (test_bb, else_bb, then_bb, else_succ, 0))
+ return FALSE;
+
+ /* Conversion went ok, including moving the insns and fixing up the
+@@ -3995,12 +4006,34 @@ find_memory (rtx *px, void *data ATTRIBU
+
+ static int
+ dead_or_predicable (basic_block test_bb, basic_block merge_bb,
+- basic_block other_bb, basic_block new_dest, int reversep)
++ basic_block other_bb, edge dest_edge, int reversep)
+ {
+- rtx head, end, jump, earliest = NULL_RTX, old_dest, new_label = NULL_RTX;
++ basic_block new_dest = dest_edge->dest;
++ rtx head, end, jump, earliest = NULL_RTX, old_dest;
+ bitmap merge_set = NULL;
+ /* Number of pending changes. */
+ int n_validated_changes = 0;
++ rtx new_dest_label;
++
++ jump = BB_END (dest_edge->src);
++ if (JUMP_P (jump))
++ {
++ new_dest_label = JUMP_LABEL (jump);
++ if (new_dest_label == NULL_RTX)
++ {
++ new_dest_label = PATTERN (jump);
++ gcc_assert (ANY_RETURN_P (new_dest_label));
++ }
++ }
++ else if (other_bb != new_dest)
++ {
++ if (new_dest == EXIT_BLOCK_PTR)
++ new_dest_label = ret_rtx;
++ else
++ new_dest_label = block_label (new_dest);
++ }
++ else
++ new_dest_label = NULL_RTX;
+
+ jump = BB_END (test_bb);
+
+@@ -4220,10 +4253,9 @@ dead_or_predicable (basic_block test_bb,
+ old_dest = JUMP_LABEL (jump);
+ if (other_bb != new_dest)
+ {
+- new_label = block_label (new_dest);
+ if (reversep
+- ? ! invert_jump_1 (jump, new_label)
+- : ! redirect_jump_1 (jump, new_label))
++ ? ! invert_jump_1 (jump, new_dest_label)
++ : ! redirect_jump_1 (jump, new_dest_label))
+ goto cancel;
+ }
+
+@@ -4234,7 +4266,7 @@ dead_or_predicable (basic_block test_bb,
+
+ if (other_bb != new_dest)
+ {
+- redirect_jump_2 (jump, old_dest, new_label, 0, reversep);
++ redirect_jump_2 (jump, old_dest, new_dest_label, 0, reversep);
+
+ redirect_edge_succ (BRANCH_EDGE (test_bb), new_dest);
+ if (reversep)
+Index: gcc-4_5-branch/gcc/jump.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/jump.c
++++ gcc-4_5-branch/gcc/jump.c
+@@ -29,7 +29,8 @@ along with GCC; see the file COPYING3.
+ JUMP_LABEL internal field. With this we can detect labels that
+ become unused because of the deletion of all the jumps that
+ formerly used them. The JUMP_LABEL info is sometimes looked
+- at by later passes.
++ at by later passes. For return insns, it contains either a
++ RETURN or a SIMPLE_RETURN rtx.
+
+ The subroutines redirect_jump and invert_jump are used
+ from other passes as well. */
+@@ -742,10 +743,10 @@ condjump_p (const_rtx insn)
+ return (GET_CODE (x) == IF_THEN_ELSE
+ && ((GET_CODE (XEXP (x, 2)) == PC
+ && (GET_CODE (XEXP (x, 1)) == LABEL_REF
+- || GET_CODE (XEXP (x, 1)) == RETURN))
++ || ANY_RETURN_P (XEXP (x, 1))))
+ || (GET_CODE (XEXP (x, 1)) == PC
+ && (GET_CODE (XEXP (x, 2)) == LABEL_REF
+- || GET_CODE (XEXP (x, 2)) == RETURN))));
++ || ANY_RETURN_P (XEXP (x, 2))))));
+ }
+
+ /* Return nonzero if INSN is a (possibly) conditional jump inside a
+@@ -774,11 +775,11 @@ condjump_in_parallel_p (const_rtx insn)
+ return 0;
+ if (XEXP (SET_SRC (x), 2) == pc_rtx
+ && (GET_CODE (XEXP (SET_SRC (x), 1)) == LABEL_REF
+- || GET_CODE (XEXP (SET_SRC (x), 1)) == RETURN))
++ || ANY_RETURN_P (XEXP (SET_SRC (x), 1)) == RETURN))
+ return 1;
+ if (XEXP (SET_SRC (x), 1) == pc_rtx
+ && (GET_CODE (XEXP (SET_SRC (x), 2)) == LABEL_REF
+- || GET_CODE (XEXP (SET_SRC (x), 2)) == RETURN))
++ || ANY_RETURN_P (XEXP (SET_SRC (x), 2))))
+ return 1;
+ return 0;
+ }
+@@ -840,8 +841,9 @@ any_condjump_p (const_rtx insn)
+ a = GET_CODE (XEXP (SET_SRC (x), 1));
+ b = GET_CODE (XEXP (SET_SRC (x), 2));
+
+- return ((b == PC && (a == LABEL_REF || a == RETURN))
+- || (a == PC && (b == LABEL_REF || b == RETURN)));
++ return ((b == PC && (a == LABEL_REF || a == RETURN || a == SIMPLE_RETURN))
++ || (a == PC
++ && (b == LABEL_REF || b == RETURN || b == SIMPLE_RETURN)));
+ }
+
+ /* Return the label of a conditional jump. */
+@@ -878,6 +880,7 @@ returnjump_p_1 (rtx *loc, void *data ATT
+ switch (GET_CODE (x))
+ {
+ case RETURN:
++ case SIMPLE_RETURN:
+ case EH_RETURN:
+ return true;
+
+@@ -1200,7 +1203,7 @@ delete_related_insns (rtx insn)
+ /* If deleting a jump, decrement the count of the label,
+ and delete the label if it is now unused. */
+
+- if (JUMP_P (insn) && JUMP_LABEL (insn))
++ if (JUMP_P (insn) && JUMP_LABEL (insn) && !ANY_RETURN_P (JUMP_LABEL (insn)))
+ {
+ rtx lab = JUMP_LABEL (insn), lab_next;
+
+@@ -1331,6 +1334,18 @@ delete_for_peephole (rtx from, rtx to)
+ is also an unconditional jump in that case. */
+ }
+
++/* A helper function for redirect_exp_1; examines its input X and returns
++ either a LABEL_REF around a label, or a RETURN if X was NULL. */
++static rtx
++redirect_target (rtx x)
++{
++ if (x == NULL_RTX)
++ return ret_rtx;
++ if (!ANY_RETURN_P (x))
++ return gen_rtx_LABEL_REF (Pmode, x);
++ return x;
++}
++
+ /* Throughout LOC, redirect OLABEL to NLABEL. Treat null OLABEL or
+ NLABEL as a return. Accrue modifications into the change group. */
+
+@@ -1342,37 +1357,19 @@ redirect_exp_1 (rtx *loc, rtx olabel, rt
+ int i;
+ const char *fmt;
+
+- if (code == LABEL_REF)
+- {
+- if (XEXP (x, 0) == olabel)
+- {
+- rtx n;
+- if (nlabel)
+- n = gen_rtx_LABEL_REF (Pmode, nlabel);
+- else
+- n = gen_rtx_RETURN (VOIDmode);
+-
+- validate_change (insn, loc, n, 1);
+- return;
+- }
+- }
+- else if (code == RETURN && olabel == 0)
++ if ((code == LABEL_REF && XEXP (x, 0) == olabel)
++ || x == olabel)
+ {
+- if (nlabel)
+- x = gen_rtx_LABEL_REF (Pmode, nlabel);
+- else
+- x = gen_rtx_RETURN (VOIDmode);
+- if (loc == &PATTERN (insn))
+- x = gen_rtx_SET (VOIDmode, pc_rtx, x);
+- validate_change (insn, loc, x, 1);
++ validate_change (insn, loc, redirect_target (nlabel), 1);
+ return;
+ }
+
+- if (code == SET && nlabel == 0 && SET_DEST (x) == pc_rtx
++ if (code == SET && SET_DEST (x) == pc_rtx
++ && ANY_RETURN_P (nlabel)
+ && GET_CODE (SET_SRC (x)) == LABEL_REF
+ && XEXP (SET_SRC (x), 0) == olabel)
+ {
+- validate_change (insn, loc, gen_rtx_RETURN (VOIDmode), 1);
++ validate_change (insn, loc, nlabel, 1);
+ return;
+ }
+
+@@ -1409,6 +1406,7 @@ redirect_jump_1 (rtx jump, rtx nlabel)
+ int ochanges = num_validated_changes ();
+ rtx *loc, asmop;
+
++ gcc_assert (nlabel);
+ asmop = extract_asm_operands (PATTERN (jump));
+ if (asmop)
+ {
+@@ -1430,17 +1428,20 @@ redirect_jump_1 (rtx jump, rtx nlabel)
+ jump target label is unused as a result, it and the code following
+ it may be deleted.
+
+- If NLABEL is zero, we are to turn the jump into a (possibly conditional)
+- RETURN insn.
++ Normally, NLABEL will be a label, but it may also be a RETURN or
++ SIMPLE_RETURN rtx; in that case we are to turn the jump into a
++ (possibly conditional) return insn.
+
+ The return value will be 1 if the change was made, 0 if it wasn't
+- (this can only occur for NLABEL == 0). */
++ (this can only occur when trying to produce return insns). */
+
+ int
+ redirect_jump (rtx jump, rtx nlabel, int delete_unused)
+ {
+ rtx olabel = JUMP_LABEL (jump);
+
++ gcc_assert (nlabel != NULL_RTX);
++
+ if (nlabel == olabel)
+ return 1;
+
+@@ -1452,7 +1453,7 @@ redirect_jump (rtx jump, rtx nlabel, int
+ }
+
+ /* Fix up JUMP_LABEL and label ref counts after OLABEL has been replaced with
+- NLABEL in JUMP.
++ NEW_DEST in JUMP.
+ If DELETE_UNUSED is positive, delete related insn to OLABEL if its ref
+ count has dropped to zero. */
+ void
+@@ -1468,13 +1469,14 @@ redirect_jump_2 (rtx jump, rtx olabel, r
+ about this. */
+ gcc_assert (delete_unused >= 0);
+ JUMP_LABEL (jump) = nlabel;
+- if (nlabel)
++ if (nlabel && !ANY_RETURN_P (nlabel))
+ ++LABEL_NUSES (nlabel);
+
+ /* Update labels in any REG_EQUAL note. */
+ if ((note = find_reg_note (jump, REG_EQUAL, NULL_RTX)) != NULL_RTX)
+ {
+- if (!nlabel || (invert && !invert_exp_1 (XEXP (note, 0), jump)))
++ if (ANY_RETURN_P (nlabel)
++ || (invert && !invert_exp_1 (XEXP (note, 0), jump)))
+ remove_note (jump, note);
+ else
+ {
+@@ -1483,7 +1485,8 @@ redirect_jump_2 (rtx jump, rtx olabel, r
+ }
+ }
+
+- if (olabel && --LABEL_NUSES (olabel) == 0 && delete_unused > 0
++ if (olabel && !ANY_RETURN_P (olabel)
++ && --LABEL_NUSES (olabel) == 0 && delete_unused > 0
+ /* Undefined labels will remain outside the insn stream. */
+ && INSN_UID (olabel))
+ delete_related_insns (olabel);
+Index: gcc-4_5-branch/gcc/opts.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/opts.c
++++ gcc-4_5-branch/gcc/opts.c
+@@ -909,6 +909,7 @@ decode_options (unsigned int argc, const
+ flag_ipa_cp = opt2;
+ flag_ipa_sra = opt2;
+ flag_ee = opt2;
++ flag_shrink_wrap = opt2;
+
+ /* Track fields in field-sensitive alias analysis. */
+ set_param_value ("max-fields-for-field-sensitive",
+Index: gcc-4_5-branch/gcc/print-rtl.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/print-rtl.c
++++ gcc-4_5-branch/gcc/print-rtl.c
+@@ -308,9 +308,16 @@ print_rtx (const_rtx in_rtx)
+ }
+ }
+ else if (i == 8 && JUMP_P (in_rtx) && JUMP_LABEL (in_rtx) != NULL)
+- /* Output the JUMP_LABEL reference. */
+- fprintf (outfile, "\n%s%*s -> %d", print_rtx_head, indent * 2, "",
+- INSN_UID (JUMP_LABEL (in_rtx)));
++ {
++ /* Output the JUMP_LABEL reference. */
++ fprintf (outfile, "\n%s%*s -> ", print_rtx_head, indent * 2, "");
++ if (GET_CODE (JUMP_LABEL (in_rtx)) == RETURN)
++ fprintf (outfile, "return");
++ else if (GET_CODE (JUMP_LABEL (in_rtx)) == SIMPLE_RETURN)
++ fprintf (outfile, "simple_return");
++ else
++ fprintf (outfile, "%d", INSN_UID (JUMP_LABEL (in_rtx)));
++ }
+ else if (i == 0 && GET_CODE (in_rtx) == VALUE)
+ {
+ #ifndef GENERATOR_FILE
+Index: gcc-4_5-branch/gcc/reorg.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/reorg.c
++++ gcc-4_5-branch/gcc/reorg.c
+@@ -161,8 +161,11 @@ static rtx *unfilled_firstobj;
+ #define unfilled_slots_next \
+ ((rtx *) obstack_next_free (&unfilled_slots_obstack))
+
+-/* Points to the label before the end of the function. */
+-static rtx end_of_function_label;
++/* Points to the label before the end of the function, or before a
++ return insn. */
++static rtx function_return_label;
++/* Likewise for a simple_return. */
++static rtx function_simple_return_label;
+
+ /* Mapping between INSN_UID's and position in the code since INSN_UID's do
+ not always monotonically increase. */
+@@ -175,7 +178,7 @@ static int stop_search_p (rtx, int);
+ static int resource_conflicts_p (struct resources *, struct resources *);
+ static int insn_references_resource_p (rtx, struct resources *, bool);
+ static int insn_sets_resource_p (rtx, struct resources *, bool);
+-static rtx find_end_label (void);
++static rtx find_end_label (rtx);
+ static rtx emit_delay_sequence (rtx, rtx, int);
+ static rtx add_to_delay_list (rtx, rtx);
+ static rtx delete_from_delay_slot (rtx);
+@@ -220,6 +223,15 @@ static void relax_delay_slots (rtx);
+ static void make_return_insns (rtx);
+ #endif
+
++/* Return true iff INSN is a simplejump, or any kind of return insn. */
++
++static bool
++simplejump_or_return_p (rtx insn)
++{
++ return (JUMP_P (insn)
++ && (simplejump_p (insn) || ANY_RETURN_P (PATTERN (insn))));
++}
++
+ /* Return TRUE if this insn should stop the search for insn to fill delay
+ slots. LABELS_P indicates that labels should terminate the search.
+ In all cases, jumps terminate the search. */
+@@ -335,23 +347,29 @@ insn_sets_resource_p (rtx insn, struct r
+
+ ??? There may be a problem with the current implementation. Suppose
+ we start with a bare RETURN insn and call find_end_label. It may set
+- end_of_function_label just before the RETURN. Suppose the machinery
++ function_return_label just before the RETURN. Suppose the machinery
+ is able to fill the delay slot of the RETURN insn afterwards. Then
+- end_of_function_label is no longer valid according to the property
++ function_return_label is no longer valid according to the property
+ described above and find_end_label will still return it unmodified.
+ Note that this is probably mitigated by the following observation:
+- once end_of_function_label is made, it is very likely the target of
++ once function_return_label is made, it is very likely the target of
+ a jump, so filling the delay slot of the RETURN will be much more
+ difficult. */
+
+ static rtx
+-find_end_label (void)
++find_end_label (rtx kind)
+ {
+ rtx insn;
++ rtx *plabel;
++
++ if (kind == ret_rtx)
++ plabel = &function_return_label;
++ else
++ plabel = &function_simple_return_label;
+
+ /* If we found one previously, return it. */
+- if (end_of_function_label)
+- return end_of_function_label;
++ if (*plabel)
++ return *plabel;
+
+ /* Otherwise, see if there is a label at the end of the function. If there
+ is, it must be that RETURN insns aren't needed, so that is our return
+@@ -366,44 +384,44 @@ find_end_label (void)
+
+ /* When a target threads its epilogue we might already have a
+ suitable return insn. If so put a label before it for the
+- end_of_function_label. */
++ function_return_label. */
+ if (BARRIER_P (insn)
+ && JUMP_P (PREV_INSN (insn))
+- && GET_CODE (PATTERN (PREV_INSN (insn))) == RETURN)
++ && PATTERN (PREV_INSN (insn)) == kind)
+ {
+ rtx temp = PREV_INSN (PREV_INSN (insn));
+- end_of_function_label = gen_label_rtx ();
+- LABEL_NUSES (end_of_function_label) = 0;
++ rtx label = gen_label_rtx ();
++ LABEL_NUSES (label) = 0;
+
+ /* Put the label before an USE insns that may precede the RETURN insn. */
+ while (GET_CODE (temp) == USE)
+ temp = PREV_INSN (temp);
+
+- emit_label_after (end_of_function_label, temp);
++ emit_label_after (label, temp);
++ *plabel = label;
+ }
+
+ else if (LABEL_P (insn))
+- end_of_function_label = insn;
++ *plabel = insn;
+ else
+ {
+- end_of_function_label = gen_label_rtx ();
+- LABEL_NUSES (end_of_function_label) = 0;
++ rtx label = gen_label_rtx ();
++ LABEL_NUSES (label) = 0;
+ /* If the basic block reorder pass moves the return insn to
+ some other place try to locate it again and put our
+- end_of_function_label there. */
+- while (insn && ! (JUMP_P (insn)
+- && (GET_CODE (PATTERN (insn)) == RETURN)))
++ function_return_label there. */
++ while (insn && ! (JUMP_P (insn) && (PATTERN (insn) == kind)))
+ insn = PREV_INSN (insn);
+ if (insn)
+ {
+ insn = PREV_INSN (insn);
+
+- /* Put the label before an USE insns that may proceed the
++ /* Put the label before an USE insns that may precede the
+ RETURN insn. */
+ while (GET_CODE (insn) == USE)
+ insn = PREV_INSN (insn);
+
+- emit_label_after (end_of_function_label, insn);
++ emit_label_after (label, insn);
+ }
+ else
+ {
+@@ -413,19 +431,16 @@ find_end_label (void)
+ && ! HAVE_return
+ #endif
+ )
+- {
+- /* The RETURN insn has its delay slot filled so we cannot
+- emit the label just before it. Since we already have
+- an epilogue and cannot emit a new RETURN, we cannot
+- emit the label at all. */
+- end_of_function_label = NULL_RTX;
+- return end_of_function_label;
+- }
++ /* The RETURN insn has its delay slot filled so we cannot
++ emit the label just before it. Since we already have
++ an epilogue and cannot emit a new RETURN, we cannot
++ emit the label at all. */
++ return NULL_RTX;
+ #endif /* HAVE_epilogue */
+
+ /* Otherwise, make a new label and emit a RETURN and BARRIER,
+ if needed. */
+- emit_label (end_of_function_label);
++ emit_label (label);
+ #ifdef HAVE_return
+ /* We don't bother trying to create a return insn if the
+ epilogue has filled delay-slots; we would have to try and
+@@ -437,19 +452,21 @@ find_end_label (void)
+ /* The return we make may have delay slots too. */
+ rtx insn = gen_return ();
+ insn = emit_jump_insn (insn);
++ JUMP_LABEL (insn) = ret_rtx;
+ emit_barrier ();
+ if (num_delay_slots (insn) > 0)
+ obstack_ptr_grow (&unfilled_slots_obstack, insn);
+ }
+ #endif
+ }
++ *plabel = label;
+ }
+
+ /* Show one additional use for this label so it won't go away until
+ we are done. */
+- ++LABEL_NUSES (end_of_function_label);
++ ++LABEL_NUSES (*plabel);
+
+- return end_of_function_label;
++ return *plabel;
+ }
+
+ /* Put INSN and LIST together in a SEQUENCE rtx of LENGTH, and replace
+@@ -797,10 +814,8 @@ optimize_skip (rtx insn)
+ if ((next_trial == next_active_insn (JUMP_LABEL (insn))
+ && ! (next_trial == 0 && crtl->epilogue_delay_list != 0))
+ || (next_trial != 0
+- && JUMP_P (next_trial)
+- && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)
+- && (simplejump_p (next_trial)
+- || GET_CODE (PATTERN (next_trial)) == RETURN)))
++ && simplejump_or_return_p (next_trial)
++ && JUMP_LABEL (insn) == JUMP_LABEL (next_trial)))
+ {
+ if (eligible_for_annul_false (insn, 0, trial, flags))
+ {
+@@ -819,13 +834,11 @@ optimize_skip (rtx insn)
+ branch, thread our jump to the target of that branch. Don't
+ change this into a RETURN here, because it may not accept what
+ we have in the delay slot. We'll fix this up later. */
+- if (next_trial && JUMP_P (next_trial)
+- && (simplejump_p (next_trial)
+- || GET_CODE (PATTERN (next_trial)) == RETURN))
++ if (next_trial && simplejump_or_return_p (next_trial))
+ {
+ rtx target_label = JUMP_LABEL (next_trial);
+- if (target_label == 0)
+- target_label = find_end_label ();
++ if (ANY_RETURN_P (target_label))
++ target_label = find_end_label (target_label);
+
+ if (target_label)
+ {
+@@ -866,7 +879,7 @@ get_jump_flags (rtx insn, rtx label)
+ if (JUMP_P (insn)
+ && (condjump_p (insn) || condjump_in_parallel_p (insn))
+ && INSN_UID (insn) <= max_uid
+- && label != 0
++ && label != 0 && !ANY_RETURN_P (label)
+ && INSN_UID (label) <= max_uid)
+ flags
+ = (uid_to_ruid[INSN_UID (label)] > uid_to_ruid[INSN_UID (insn)])
+@@ -1038,7 +1051,7 @@ get_branch_condition (rtx insn, rtx targ
+ pat = XVECEXP (pat, 0, 0);
+
+ if (GET_CODE (pat) == RETURN)
+- return target == 0 ? const_true_rtx : 0;
++ return ANY_RETURN_P (target) ? const_true_rtx : 0;
+
+ else if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
+ return 0;
+@@ -1318,7 +1331,11 @@ steal_delay_list_from_target (rtx insn,
+ }
+
+ /* Show the place to which we will be branching. */
+- *pnew_thread = next_active_insn (JUMP_LABEL (XVECEXP (seq, 0, 0)));
++ temp = JUMP_LABEL (XVECEXP (seq, 0, 0));
++ if (ANY_RETURN_P (temp))
++ *pnew_thread = temp;
++ else
++ *pnew_thread = next_active_insn (temp);
+
+ /* Add any new insns to the delay list and update the count of the
+ number of slots filled. */
+@@ -1358,8 +1375,7 @@ steal_delay_list_from_fallthrough (rtx i
+ /* We can't do anything if SEQ's delay insn isn't an
+ unconditional branch. */
+
+- if (! simplejump_p (XVECEXP (seq, 0, 0))
+- && GET_CODE (PATTERN (XVECEXP (seq, 0, 0))) != RETURN)
++ if (! simplejump_or_return_p (XVECEXP (seq, 0, 0)))
+ return delay_list;
+
+ for (i = 1; i < XVECLEN (seq, 0); i++)
+@@ -1827,7 +1843,7 @@ own_thread_p (rtx thread, rtx label, int
+ rtx insn;
+
+ /* We don't own the function end. */
+- if (thread == 0)
++ if (ANY_RETURN_P (thread))
+ return 0;
+
+ /* Get the first active insn, or THREAD, if it is an active insn. */
+@@ -2245,7 +2261,8 @@ fill_simple_delay_slots (int non_jumps_p
+ && (!JUMP_P (insn)
+ || ((condjump_p (insn) || condjump_in_parallel_p (insn))
+ && ! simplejump_p (insn)
+- && JUMP_LABEL (insn) != 0)))
++ && JUMP_LABEL (insn) != 0
++ && !ANY_RETURN_P (JUMP_LABEL (insn)))))
+ {
+ /* Invariant: If insn is a JUMP_INSN, the insn's jump
+ label. Otherwise, zero. */
+@@ -2270,7 +2287,7 @@ fill_simple_delay_slots (int non_jumps_p
+ target = JUMP_LABEL (insn);
+ }
+
+- if (target == 0)
++ if (target == 0 || ANY_RETURN_P (target))
+ for (trial = next_nonnote_insn (insn); trial; trial = next_trial)
+ {
+ next_trial = next_nonnote_insn (trial);
+@@ -2349,6 +2366,7 @@ fill_simple_delay_slots (int non_jumps_p
+ && JUMP_P (trial)
+ && simplejump_p (trial)
+ && (target == 0 || JUMP_LABEL (trial) == target)
++ && !ANY_RETURN_P (JUMP_LABEL (trial))
+ && (next_trial = next_active_insn (JUMP_LABEL (trial))) != 0
+ && ! (NONJUMP_INSN_P (next_trial)
+ && GET_CODE (PATTERN (next_trial)) == SEQUENCE)
+@@ -2371,7 +2389,7 @@ fill_simple_delay_slots (int non_jumps_p
+ if (new_label != 0)
+ new_label = get_label_before (new_label);
+ else
+- new_label = find_end_label ();
++ new_label = find_end_label (simple_return_rtx);
+
+ if (new_label)
+ {
+@@ -2503,7 +2521,8 @@ fill_simple_delay_slots (int non_jumps_p
+
+ /* Follow any unconditional jump at LABEL;
+ return the ultimate label reached by any such chain of jumps.
+- Return null if the chain ultimately leads to a return instruction.
++ Return a suitable return rtx if the chain ultimately leads to a
++ return instruction.
+ If LABEL is not followed by a jump, return LABEL.
+ If the chain loops or we can't find end, return LABEL,
+ since that tells caller to avoid changing the insn. */
+@@ -2518,6 +2537,7 @@ follow_jumps (rtx label)
+
+ for (depth = 0;
+ (depth < 10
++ && !ANY_RETURN_P (value)
+ && (insn = next_active_insn (value)) != 0
+ && JUMP_P (insn)
+ && ((JUMP_LABEL (insn) != 0 && any_uncondjump_p (insn)
+@@ -2527,18 +2547,22 @@ follow_jumps (rtx label)
+ && BARRIER_P (next));
+ depth++)
+ {
+- rtx tem;
++ rtx this_label = JUMP_LABEL (insn);
+
+ /* If we have found a cycle, make the insn jump to itself. */
+- if (JUMP_LABEL (insn) == label)
++ if (this_label == label)
+ return label;
+
+- tem = next_active_insn (JUMP_LABEL (insn));
+- if (tem && (GET_CODE (PATTERN (tem)) == ADDR_VEC
++ if (!ANY_RETURN_P (this_label))
++ {
++ rtx tem = next_active_insn (this_label);
++ if (tem
++ && (GET_CODE (PATTERN (tem)) == ADDR_VEC
+ || GET_CODE (PATTERN (tem)) == ADDR_DIFF_VEC))
+- break;
++ break;
++ }
+
+- value = JUMP_LABEL (insn);
++ value = this_label;
+ }
+ if (depth == 10)
+ return label;
+@@ -2901,6 +2925,7 @@ fill_slots_from_thread (rtx insn, rtx co
+ arithmetic insn after the jump insn and put the arithmetic insn in the
+ delay slot. If we can't do this, return. */
+ if (delay_list == 0 && likely && new_thread
++ && !ANY_RETURN_P (new_thread)
+ && NONJUMP_INSN_P (new_thread)
+ && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
+ && asm_noperands (PATTERN (new_thread)) < 0)
+@@ -2985,16 +3010,14 @@ fill_slots_from_thread (rtx insn, rtx co
+
+ gcc_assert (thread_if_true);
+
+- if (new_thread && JUMP_P (new_thread)
+- && (simplejump_p (new_thread)
+- || GET_CODE (PATTERN (new_thread)) == RETURN)
++ if (new_thread && simplejump_or_return_p (new_thread)
+ && redirect_with_delay_list_safe_p (insn,
+ JUMP_LABEL (new_thread),
+ delay_list))
+ new_thread = follow_jumps (JUMP_LABEL (new_thread));
+
+- if (new_thread == 0)
+- label = find_end_label ();
++ if (ANY_RETURN_P (new_thread))
++ label = find_end_label (new_thread);
+ else if (LABEL_P (new_thread))
+ label = new_thread;
+ else
+@@ -3340,11 +3363,12 @@ relax_delay_slots (rtx first)
+ group of consecutive labels. */
+ if (JUMP_P (insn)
+ && (condjump_p (insn) || condjump_in_parallel_p (insn))
+- && (target_label = JUMP_LABEL (insn)) != 0)
++ && (target_label = JUMP_LABEL (insn)) != 0
++ && !ANY_RETURN_P (target_label))
+ {
+ target_label = skip_consecutive_labels (follow_jumps (target_label));
+- if (target_label == 0)
+- target_label = find_end_label ();
++ if (ANY_RETURN_P (target_label))
++ target_label = find_end_label (target_label);
+
+ if (target_label && next_active_insn (target_label) == next
+ && ! condjump_in_parallel_p (insn))
+@@ -3359,9 +3383,8 @@ relax_delay_slots (rtx first)
+ /* See if this jump conditionally branches around an unconditional
+ jump. If so, invert this jump and point it to the target of the
+ second jump. */
+- if (next && JUMP_P (next)
++ if (next && simplejump_or_return_p (next)
+ && any_condjump_p (insn)
+- && (simplejump_p (next) || GET_CODE (PATTERN (next)) == RETURN)
+ && target_label
+ && next_active_insn (target_label) == next_active_insn (next)
+ && no_labels_between_p (insn, next))
+@@ -3403,8 +3426,7 @@ relax_delay_slots (rtx first)
+ Don't do this if we expect the conditional branch to be true, because
+ we would then be making the more common case longer. */
+
+- if (JUMP_P (insn)
+- && (simplejump_p (insn) || GET_CODE (PATTERN (insn)) == RETURN)
++ if (simplejump_or_return_p (insn)
+ && (other = prev_active_insn (insn)) != 0
+ && any_condjump_p (other)
+ && no_labels_between_p (other, insn)
+@@ -3445,10 +3467,10 @@ relax_delay_slots (rtx first)
+ Only do so if optimizing for size since this results in slower, but
+ smaller code. */
+ if (optimize_function_for_size_p (cfun)
+- && GET_CODE (PATTERN (delay_insn)) == RETURN
++ && ANY_RETURN_P (PATTERN (delay_insn))
+ && next
+ && JUMP_P (next)
+- && GET_CODE (PATTERN (next)) == RETURN)
++ && PATTERN (next) == PATTERN (delay_insn))
+ {
+ rtx after;
+ int i;
+@@ -3487,14 +3509,16 @@ relax_delay_slots (rtx first)
+ continue;
+
+ target_label = JUMP_LABEL (delay_insn);
++ if (target_label && ANY_RETURN_P (target_label))
++ continue;
+
+ if (target_label)
+ {
+ /* If this jump goes to another unconditional jump, thread it, but
+ don't convert a jump into a RETURN here. */
+ trial = skip_consecutive_labels (follow_jumps (target_label));
+- if (trial == 0)
+- trial = find_end_label ();
++ if (ANY_RETURN_P (trial))
++ trial = find_end_label (trial);
+
+ if (trial && trial != target_label
+ && redirect_with_delay_slots_safe_p (delay_insn, trial, insn))
+@@ -3517,7 +3541,7 @@ relax_delay_slots (rtx first)
+ later incorrectly compute register live/death info. */
+ rtx tmp = next_active_insn (trial);
+ if (tmp == 0)
+- tmp = find_end_label ();
++ tmp = find_end_label (simple_return_rtx);
+
+ if (tmp)
+ {
+@@ -3537,14 +3561,12 @@ relax_delay_slots (rtx first)
+ delay list and that insn is redundant, thread the jump. */
+ if (trial && GET_CODE (PATTERN (trial)) == SEQUENCE
+ && XVECLEN (PATTERN (trial), 0) == 2
+- && JUMP_P (XVECEXP (PATTERN (trial), 0, 0))
+- && (simplejump_p (XVECEXP (PATTERN (trial), 0, 0))
+- || GET_CODE (PATTERN (XVECEXP (PATTERN (trial), 0, 0))) == RETURN)
++ && simplejump_or_return_p (XVECEXP (PATTERN (trial), 0, 0))
+ && redundant_insn (XVECEXP (PATTERN (trial), 0, 1), insn, 0))
+ {
+ target_label = JUMP_LABEL (XVECEXP (PATTERN (trial), 0, 0));
+- if (target_label == 0)
+- target_label = find_end_label ();
++ if (ANY_RETURN_P (target_label))
++ target_label = find_end_label (target_label);
+
+ if (target_label
+ && redirect_with_delay_slots_safe_p (delay_insn, target_label,
+@@ -3622,16 +3644,15 @@ relax_delay_slots (rtx first)
+ a RETURN here. */
+ if (! INSN_ANNULLED_BRANCH_P (delay_insn)
+ && any_condjump_p (delay_insn)
+- && next && JUMP_P (next)
+- && (simplejump_p (next) || GET_CODE (PATTERN (next)) == RETURN)
++ && next && simplejump_or_return_p (next)
+ && next_active_insn (target_label) == next_active_insn (next)
+ && no_labels_between_p (insn, next))
+ {
+ rtx label = JUMP_LABEL (next);
+ rtx old_label = JUMP_LABEL (delay_insn);
+
+- if (label == 0)
+- label = find_end_label ();
++ if (ANY_RETURN_P (label))
++ label = find_end_label (label);
+
+ /* find_end_label can generate a new label. Check this first. */
+ if (label
+@@ -3692,7 +3713,8 @@ static void
+ make_return_insns (rtx first)
+ {
+ rtx insn, jump_insn, pat;
+- rtx real_return_label = end_of_function_label;
++ rtx real_return_label = function_return_label;
++ rtx real_simple_return_label = function_simple_return_label;
+ int slots, i;
+
+ #ifdef DELAY_SLOTS_FOR_EPILOGUE
+@@ -3707,18 +3729,25 @@ make_return_insns (rtx first)
+ #endif
+
+ /* See if there is a RETURN insn in the function other than the one we
+- made for END_OF_FUNCTION_LABEL. If so, set up anything we can't change
++ made for FUNCTION_RETURN_LABEL. If so, set up anything we can't change
+ into a RETURN to jump to it. */
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+- if (JUMP_P (insn) && GET_CODE (PATTERN (insn)) == RETURN)
++ if (JUMP_P (insn) && ANY_RETURN_P (PATTERN (insn)))
+ {
+- real_return_label = get_label_before (insn);
++ rtx t = get_label_before (insn);
++ if (PATTERN (insn) == ret_rtx)
++ real_return_label = t;
++ else
++ real_simple_return_label = t;
+ break;
+ }
+
+ /* Show an extra usage of REAL_RETURN_LABEL so it won't go away if it
+- was equal to END_OF_FUNCTION_LABEL. */
+- LABEL_NUSES (real_return_label)++;
++ was equal to FUNCTION_RETURN_LABEL. */
++ if (real_return_label)
++ LABEL_NUSES (real_return_label)++;
++ if (real_simple_return_label)
++ LABEL_NUSES (real_simple_return_label)++;
+
+ /* Clear the list of insns to fill so we can use it. */
+ obstack_free (&unfilled_slots_obstack, unfilled_firstobj);
+@@ -3726,13 +3755,27 @@ make_return_insns (rtx first)
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ int flags;
++ rtx kind, real_label;
+
+ /* Only look at filled JUMP_INSNs that go to the end of function
+ label. */
+ if (!NONJUMP_INSN_P (insn)
+ || GET_CODE (PATTERN (insn)) != SEQUENCE
+- || !JUMP_P (XVECEXP (PATTERN (insn), 0, 0))
+- || JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) != end_of_function_label)
++ || !JUMP_P (XVECEXP (PATTERN (insn), 0, 0)))
++ continue;
++
++ if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0)) == function_return_label)
++ {
++ kind = ret_rtx;
++ real_label = real_return_label;
++ }
++ else if (JUMP_LABEL (XVECEXP (PATTERN (insn), 0, 0))
++ == function_simple_return_label)
++ {
++ kind = simple_return_rtx;
++ real_label = real_simple_return_label;
++ }
++ else
+ continue;
+
+ pat = PATTERN (insn);
+@@ -3740,14 +3783,12 @@ make_return_insns (rtx first)
+
+ /* If we can't make the jump into a RETURN, try to redirect it to the best
+ RETURN and go on to the next insn. */
+- if (! reorg_redirect_jump (jump_insn, NULL_RTX))
++ if (! reorg_redirect_jump (jump_insn, kind))
+ {
+ /* Make sure redirecting the jump will not invalidate the delay
+ slot insns. */
+- if (redirect_with_delay_slots_safe_p (jump_insn,
+- real_return_label,
+- insn))
+- reorg_redirect_jump (jump_insn, real_return_label);
++ if (redirect_with_delay_slots_safe_p (jump_insn, real_label, insn))
++ reorg_redirect_jump (jump_insn, real_label);
+ continue;
+ }
+
+@@ -3787,7 +3828,7 @@ make_return_insns (rtx first)
+ RETURN, delete the SEQUENCE and output the individual insns,
+ followed by the RETURN. Then set things up so we try to find
+ insns for its delay slots, if it needs some. */
+- if (GET_CODE (PATTERN (jump_insn)) == RETURN)
++ if (ANY_RETURN_P (PATTERN (jump_insn)))
+ {
+ rtx prev = PREV_INSN (insn);
+
+@@ -3804,13 +3845,16 @@ make_return_insns (rtx first)
+ else
+ /* It is probably more efficient to keep this with its current
+ delay slot as a branch to a RETURN. */
+- reorg_redirect_jump (jump_insn, real_return_label);
++ reorg_redirect_jump (jump_insn, real_label);
+ }
+
+ /* Now delete REAL_RETURN_LABEL if we never used it. Then try to fill any
+ new delay slots we have created. */
+- if (--LABEL_NUSES (real_return_label) == 0)
++ if (real_return_label != NULL_RTX && --LABEL_NUSES (real_return_label) == 0)
+ delete_related_insns (real_return_label);
++ if (real_simple_return_label != NULL_RTX
++ && --LABEL_NUSES (real_simple_return_label) == 0)
++ delete_related_insns (real_simple_return_label);
+
+ fill_simple_delay_slots (1);
+ fill_simple_delay_slots (0);
+@@ -3878,7 +3922,7 @@ dbr_schedule (rtx first)
+ init_resource_info (epilogue_insn);
+
+ /* Show we haven't computed an end-of-function label yet. */
+- end_of_function_label = 0;
++ function_return_label = function_simple_return_label = NULL_RTX;
+
+ /* Initialize the statistics for this function. */
+ memset (num_insns_needing_delays, 0, sizeof num_insns_needing_delays);
+@@ -3900,11 +3944,23 @@ dbr_schedule (rtx first)
+ /* If we made an end of function label, indicate that it is now
+ safe to delete it by undoing our prior adjustment to LABEL_NUSES.
+ If it is now unused, delete it. */
+- if (end_of_function_label && --LABEL_NUSES (end_of_function_label) == 0)
+- delete_related_insns (end_of_function_label);
++ if (function_return_label && --LABEL_NUSES (function_return_label) == 0)
++ delete_related_insns (function_return_label);
++ if (function_simple_return_label
++ && --LABEL_NUSES (function_simple_return_label) == 0)
++ delete_related_insns (function_simple_return_label);
+
++#if defined HAVE_return || defined HAVE_simple_return
++ if (
+ #ifdef HAVE_return
+- if (HAVE_return && end_of_function_label != 0)
++ (HAVE_return && function_return_label != 0)
++#else
++ 0
++#endif
++#ifdef HAVE_simple_return
++ || (HAVE_simple_return && function_simple_return_label != 0)
++#endif
++ )
+ make_return_insns (first);
+ #endif
+
+Index: gcc-4_5-branch/gcc/resource.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/resource.c
++++ gcc-4_5-branch/gcc/resource.c
+@@ -495,6 +495,8 @@ find_dead_or_set_registers (rtx target,
+ || GET_CODE (PATTERN (this_jump_insn)) == RETURN)
+ {
+ next = JUMP_LABEL (this_jump_insn);
++ if (next && ANY_RETURN_P (next))
++ next = NULL_RTX;
+ if (jump_insn == 0)
+ {
+ jump_insn = insn;
+@@ -562,9 +564,10 @@ find_dead_or_set_registers (rtx target,
+ AND_COMPL_HARD_REG_SET (scratch, needed.regs);
+ AND_COMPL_HARD_REG_SET (fallthrough_res.regs, scratch);
+
+- find_dead_or_set_registers (JUMP_LABEL (this_jump_insn),
+- &target_res, 0, jump_count,
+- target_set, needed);
++ if (!ANY_RETURN_P (JUMP_LABEL (this_jump_insn)))
++ find_dead_or_set_registers (JUMP_LABEL (this_jump_insn),
++ &target_res, 0, jump_count,
++ target_set, needed);
+ find_dead_or_set_registers (next,
+ &fallthrough_res, 0, jump_count,
+ set, needed);
+@@ -1097,6 +1100,8 @@ mark_target_live_regs (rtx insns, rtx ta
+ struct resources new_resources;
+ rtx stop_insn = next_active_insn (jump_insn);
+
++ if (jump_target && ANY_RETURN_P (jump_target))
++ jump_target = NULL_RTX;
+ mark_target_live_regs (insns, next_active_insn (jump_target),
+ &new_resources);
+ CLEAR_RESOURCE (&set);
+Index: gcc-4_5-branch/gcc/rtl.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/rtl.c
++++ gcc-4_5-branch/gcc/rtl.c
+@@ -256,6 +256,8 @@ copy_rtx (rtx orig)
+ case CODE_LABEL:
+ case PC:
+ case CC0:
++ case RETURN:
++ case SIMPLE_RETURN:
+ case SCRATCH:
+ /* SCRATCH must be shared because they represent distinct values. */
+ return orig;
+Index: gcc-4_5-branch/gcc/rtl.def
+===================================================================
+--- gcc-4_5-branch.orig/gcc/rtl.def
++++ gcc-4_5-branch/gcc/rtl.def
+@@ -296,6 +296,10 @@ DEF_RTL_EXPR(CALL, "call", "ee", RTX_EXT
+
+ DEF_RTL_EXPR(RETURN, "return", "", RTX_EXTRA)
+
++/* A plain return, to be used on paths that are reached without going
++ through the function prologue. */
++DEF_RTL_EXPR(SIMPLE_RETURN, "simple_return", "", RTX_EXTRA)
++
+ /* Special for EH return from subroutine. */
+
+ DEF_RTL_EXPR(EH_RETURN, "eh_return", "", RTX_EXTRA)
+Index: gcc-4_5-branch/gcc/rtl.h
+===================================================================
+--- gcc-4_5-branch.orig/gcc/rtl.h
++++ gcc-4_5-branch/gcc/rtl.h
+@@ -411,6 +411,10 @@ struct GTY(()) rtvec_def {
+ (JUMP_P (INSN) && (GET_CODE (PATTERN (INSN)) == ADDR_VEC || \
+ GET_CODE (PATTERN (INSN)) == ADDR_DIFF_VEC))
+
++/* Predicate yielding nonzero iff X is a return or simple_preturn. */
++#define ANY_RETURN_P(X) \
++ (GET_CODE (X) == RETURN || GET_CODE (X) == SIMPLE_RETURN)
++
+ /* 1 if X is a unary operator. */
+
+ #define UNARY_P(X) \
+@@ -1998,6 +2002,8 @@ enum global_rtl_index
+ {
+ GR_PC,
+ GR_CC0,
++ GR_RETURN,
++ GR_SIMPLE_RETURN,
+ GR_STACK_POINTER,
+ GR_FRAME_POINTER,
+ /* For register elimination to work properly these hard_frame_pointer_rtx,
+@@ -2032,6 +2038,8 @@ extern GTY(()) rtx global_rtl[GR_MAX];
+
+ /* Standard pieces of rtx, to be substituted directly into things. */
+ #define pc_rtx (global_rtl[GR_PC])
++#define ret_rtx (global_rtl[GR_RETURN])
++#define simple_return_rtx (global_rtl[GR_SIMPLE_RETURN])
+ #define cc0_rtx (global_rtl[GR_CC0])
+
+ /* All references to certain hard regs, except those created
+Index: gcc-4_5-branch/gcc/rtlanal.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/rtlanal.c
++++ gcc-4_5-branch/gcc/rtlanal.c
+@@ -2673,6 +2673,7 @@ tablejump_p (const_rtx insn, rtx *labelp
+
+ if (JUMP_P (insn)
+ && (label = JUMP_LABEL (insn)) != NULL_RTX
++ && !ANY_RETURN_P (label)
+ && (table = next_active_insn (label)) != NULL_RTX
+ && JUMP_TABLE_DATA_P (table))
+ {
+Index: gcc-4_5-branch/gcc/sched-int.h
+===================================================================
+--- gcc-4_5-branch.orig/gcc/sched-int.h
++++ gcc-4_5-branch/gcc/sched-int.h
+@@ -199,7 +199,7 @@ extern int max_issue (struct ready_list
+
+ extern void ebb_compute_jump_reg_dependencies (rtx, regset, regset, regset);
+
+-extern edge find_fallthru_edge (basic_block);
++extern edge find_fallthru_edge_from (basic_block);
+
+ extern void (* sched_init_only_bb) (basic_block, basic_block);
+ extern basic_block (* sched_split_block) (basic_block, rtx);
+Index: gcc-4_5-branch/gcc/sched-vis.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/sched-vis.c
++++ gcc-4_5-branch/gcc/sched-vis.c
+@@ -549,6 +549,9 @@ print_pattern (char *buf, const_rtx x, i
+ case RETURN:
+ sprintf (buf, "return");
+ break;
++ case SIMPLE_RETURN:
++ sprintf (buf, "simple_return");
++ break;
+ case CALL:
+ print_exp (buf, x, verbose);
+ break;
+Index: gcc-4_5-branch/gcc/sel-sched-ir.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/sel-sched-ir.c
++++ gcc-4_5-branch/gcc/sel-sched-ir.c
+@@ -686,7 +686,7 @@ merge_fences (fence_t f, insn_t insn,
+
+ /* Find fallthrough edge. */
+ gcc_assert (BLOCK_FOR_INSN (insn)->prev_bb);
+- candidate = find_fallthru_edge (BLOCK_FOR_INSN (insn)->prev_bb);
++ candidate = find_fallthru_edge_from (BLOCK_FOR_INSN (insn)->prev_bb);
+
+ if (!candidate
+ || (candidate->src != BLOCK_FOR_INSN (last_scheduled_insn)
+Index: gcc-4_5-branch/gcc/sel-sched.c
+===================================================================
+--- gcc-4_5-branch.orig/gcc/sel-sched.c
++++ gcc-4_5-branch/gcc/sel-sched.c
+@@ -617,8 +617,8 @@ in_fallthru_bb_p (rtx insn, rtx succ)
+ if (bb == BLOCK_FOR_INSN (succ))
+ return true;
+
+- if (find_fallthru_edge (bb))
+- bb = find_fallthru_edge (bb)->dest;
++ if (find_fallthru_edge_from (bb))
++ bb = find_fallthru_edge_from (bb)->dest;
+ else
+ return false;
+
+@@ -4911,7 +4911,7 @@ move_cond_jump (rtx insn, bnd_t bnd)
+ next = PREV_INSN (insn);
+ BND_TO (bnd) = insn;
+
+- ft_edge = find_fallthru_edge (block_from);
++ ft_edge = find_fallthru_edge_from (block_from);
+ block_next = ft_edge->dest;
+ /* There must be a fallthrough block (or where should go
+ control flow in case of false jump predicate otherwise?). */
+Index: gcc-4_5-branch/gcc/vec.h
+===================================================================
+--- gcc-4_5-branch.orig/gcc/vec.h
++++ gcc-4_5-branch/gcc/vec.h
+@@ -188,6 +188,18 @@ along with GCC; see the file COPYING3.
+
+ #define VEC_iterate(T,V,I,P) (VEC_OP(T,base,iterate)(VEC_BASE(V),I,&(P)))
+
++/* Convenience macro for forward iteration. */
++
++#define FOR_EACH_VEC_ELT(T, V, I, P) \
++ for (I = 0; VEC_iterate (T, (V), (I), (P)); ++(I))
++
++/* Convenience macro for reverse iteration. */
++
++#define FOR_EACH_VEC_ELT_REVERSE(T,V,I,P) \
++ for (I = VEC_length (T, (V)) - 1; \
++ VEC_iterate (T, (V), (I), (P)); \
++ (I)--)
++
+ /* Allocate new vector.
+ VEC(T,A) *VEC_T_A_alloc(int reserve);
+
OpenPOWER on IntegriCloud