summaryrefslogtreecommitdiffstats
path: root/contrib/gcc/config
diff options
context:
space:
mode:
authorobrien <obrien@FreeBSD.org>1999-10-16 08:08:21 +0000
committerobrien <obrien@FreeBSD.org>1999-10-16 08:08:21 +0000
commitd6efeabb7a7ac174749ef84def049432de89753d (patch)
tree233160130bf29c5045124d3879ec3fac4dd5809c /contrib/gcc/config
parentaf6623e278429f4c4548c936e133a52dc56b04ab (diff)
downloadFreeBSD-src-d6efeabb7a7ac174749ef84def049432de89753d.zip
FreeBSD-src-d6efeabb7a7ac174749ef84def049432de89753d.tar.gz
Bring our profiling tweaks from rev 1.2 into GCC 2.95.1.
Diffstat (limited to 'contrib/gcc/config')
-rw-r--r--contrib/gcc/config/i386/i386.c1830
1 files changed, 1145 insertions, 685 deletions
diff --git a/contrib/gcc/config/i386/i386.c b/contrib/gcc/config/i386/i386.c
index 52147d1..afbfd11 100644
--- a/contrib/gcc/config/i386/i386.c
+++ b/contrib/gcc/config/i386/i386.c
@@ -107,8 +107,42 @@ struct processor_costs pentiumpro_cost = {
17 /* cost of a divide/mod */
};
+/* We use decoding time together with execution time.
+ To get correct vale add 1 for short decodable, 2 for long decodable
+ and 4 for vector decodable instruction to execution time and divide
+ by two (because CPU is able to do two insns at a time). */
+
+struct processor_costs k6_cost = {
+ 1, /* cost of an add instruction */
+ 1, /* cost of a lea instruction */
+ 1, /* variable shift costs */
+ 1, /* constant shift costs */
+ 3, /* cost of starting a multiply */
+ 0, /* cost of multiply per each bit set */
+ 20 /* cost of a divide/mod */
+};
+
struct processor_costs *ix86_cost = &pentium_cost;
+/* Processor feature/optimization bitmasks. */
+#define m_386 (1<<PROCESSOR_I386)
+#define m_486 (1<<PROCESSOR_I486)
+#define m_PENT (1<<PROCESSOR_PENTIUM)
+#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
+#define m_K6 (1<<PROCESSOR_K6)
+
+const int x86_use_leave = m_386 | m_K6;
+const int x86_push_memory = m_386 | m_K6;
+const int x86_zero_extend_with_and = m_486 | m_PENT;
+const int x86_movx = m_386 | m_PPRO | m_K6;
+const int x86_double_with_add = ~(m_386 | m_PENT | m_PPRO);
+const int x86_use_bit_test = m_386;
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO;
+const int x86_use_q_reg = m_PENT | m_PPRO | m_K6;
+const int x86_use_any_reg = m_486;
+const int x86_cmove = m_PPRO;
+const int x86_deep_branch = m_PPRO| m_K6;
+
#define AT_BP(mode) (gen_rtx_MEM ((mode), frame_pointer_rtx))
extern FILE *asm_out_file;
@@ -176,6 +210,12 @@ char *i386_align_loops_string;
/* Power of two alignment for non-loop jumps. */
char *i386_align_jumps_string;
+/* Power of two alignment for stack boundary in bytes. */
+char *i386_preferred_stack_boundary_string;
+
+/* Preferred alignment for stack boundary in bits. */
+int i386_preferred_stack_boundary;
+
/* Values 1-5: see jump.c */
int i386_branch_cost;
char *i386_branch_cost_string;
@@ -212,15 +252,16 @@ override_options ()
struct processor_costs *cost; /* Processor costs */
int target_enable; /* Target flags to enable. */
int target_disable; /* Target flags to disable. */
- } processor_target_table[]
- = {{PROCESSOR_I386_STRING, PROCESSOR_I386, &i386_cost, 0, 0},
- {PROCESSOR_I486_STRING, PROCESSOR_I486, &i486_cost, 0, 0},
- {PROCESSOR_I586_STRING, PROCESSOR_PENTIUM, &pentium_cost, 0, 0},
- {PROCESSOR_PENTIUM_STRING, PROCESSOR_PENTIUM, &pentium_cost, 0, 0},
- {PROCESSOR_I686_STRING, PROCESSOR_PENTIUMPRO, &pentiumpro_cost,
- 0, 0},
- {PROCESSOR_PENTIUMPRO_STRING, PROCESSOR_PENTIUMPRO,
- &pentiumpro_cost, 0, 0}};
+ } processor_target_table[] = {
+ {PROCESSOR_I386_STRING, PROCESSOR_I386, &i386_cost, 0, 0},
+ {PROCESSOR_I486_STRING, PROCESSOR_I486, &i486_cost, 0, 0},
+ {PROCESSOR_I586_STRING, PROCESSOR_PENTIUM, &pentium_cost, 0, 0},
+ {PROCESSOR_PENTIUM_STRING, PROCESSOR_PENTIUM, &pentium_cost, 0, 0},
+ {PROCESSOR_I686_STRING, PROCESSOR_PENTIUMPRO, &pentiumpro_cost, 0, 0},
+ {PROCESSOR_PENTIUMPRO_STRING, PROCESSOR_PENTIUMPRO,
+ &pentiumpro_cost, 0, 0},
+ {PROCESSOR_K6_STRING, PROCESSOR_K6, &k6_cost, 0, 0}
+ };
int ptt_size = sizeof (processor_target_table) / sizeof (struct ptt);
@@ -286,7 +327,7 @@ override_options ()
{
ix86_cpu = processor_target_table[j].processor;
ix86_cost = processor_target_table[j].cost;
- if (i > j && (int) ix86_arch >= (int) PROCESSOR_PENTIUMPRO)
+ if (i > j && (int) ix86_arch >= (int) PROCESSOR_K6)
error ("-mcpu=%s does not support -march=%s",
ix86_cpu_string, ix86_arch_string);
@@ -318,6 +359,11 @@ override_options ()
def_align = (TARGET_486) ? 4 : 2;
/* Validate -malign-loops= value, or provide default. */
+#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
+ i386_align_loops = 4;
+#else
+ i386_align_loops = 2;
+#endif
if (i386_align_loops_string)
{
i386_align_loops = atoi (i386_align_loops_string);
@@ -325,14 +371,13 @@ override_options ()
fatal ("-malign-loops=%d is not between 0 and %d",
i386_align_loops, MAX_CODE_ALIGN);
}
- else
+
+ /* Validate -malign-jumps= value, or provide default. */
#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
- i386_align_loops = 4;
+ i386_align_jumps = 4;
#else
- i386_align_loops = 2;
+ i386_align_jumps = def_align;
#endif
-
- /* Validate -malign-jumps= value, or provide default. */
if (i386_align_jumps_string)
{
i386_align_jumps = atoi (i386_align_jumps_string);
@@ -340,14 +385,9 @@ override_options ()
fatal ("-malign-jumps=%d is not between 0 and %d",
i386_align_jumps, MAX_CODE_ALIGN);
}
- else
-#ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
- i386_align_jumps = 4;
-#else
- i386_align_jumps = def_align;
-#endif
/* Validate -malign-functions= value, or provide default. */
+ i386_align_funcs = def_align;
if (i386_align_funcs_string)
{
i386_align_funcs = atoi (i386_align_funcs_string);
@@ -355,19 +395,26 @@ override_options ()
fatal ("-malign-functions=%d is not between 0 and %d",
i386_align_funcs, MAX_CODE_ALIGN);
}
- else
- i386_align_funcs = def_align;
+
+ /* Validate -mpreferred_stack_boundary= value, or provide default.
+ The default of 128 bits is for Pentium III's SSE __m128. */
+ i386_preferred_stack_boundary = 128;
+ if (i386_preferred_stack_boundary_string)
+ {
+ i = atoi (i386_preferred_stack_boundary_string);
+ if (i < 2 || i > 31)
+ fatal ("-mpreferred_stack_boundary=%d is not between 2 and 31", i);
+ i386_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
+ }
/* Validate -mbranch-cost= value, or provide default. */
+ i386_branch_cost = 1;
if (i386_branch_cost_string)
{
i386_branch_cost = atoi (i386_branch_cost_string);
if (i386_branch_cost < 0 || i386_branch_cost > 5)
- fatal ("-mbranch-cost=%d is not between 0 and 5",
- i386_branch_cost);
+ fatal ("-mbranch-cost=%d is not between 0 and 5", i386_branch_cost);
}
- else
- i386_branch_cost = 1;
/* Keep nonleaf frame pointers. */
if (TARGET_OMIT_LEAF_FRAME_POINTER)
@@ -599,9 +646,19 @@ i386_valid_type_attribute_p (type, attributes, identifier, args)
int
i386_comp_type_attributes (type1, type2)
- tree type1 ATTRIBUTE_UNUSED;
- tree type2 ATTRIBUTE_UNUSED;
+ tree type1;
+ tree type2;
{
+ /* Check for mismatch of non-default calling convention. */
+ char *rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
+
+ if (TREE_CODE (type1) != FUNCTION_TYPE)
+ return 1;
+
+ /* Check for mismatched return types (cdecl vs stdcall). */
+ if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
+ != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
+ return 0;
return 1;
}
@@ -821,150 +878,6 @@ function_arg_partial_nregs (cum, mode, type, named)
return 0;
}
-/* Output an insn whose source is a 386 integer register. SRC is the
- rtx for the register, and TEMPLATE is the op-code template. SRC may
- be either SImode or DImode.
-
- The template will be output with operands[0] as SRC, and operands[1]
- as a pointer to the top of the 386 stack. So a call from floatsidf2
- would look like this:
-
- output_op_from_reg (operands[1], AS1 (fild%z0,%1));
-
- where %z0 corresponds to the caller's operands[1], and is used to
- emit the proper size suffix.
-
- ??? Extend this to handle HImode - a 387 can load and store HImode
- values directly. */
-
-void
-output_op_from_reg (src, template)
- rtx src;
- char *template;
-{
- rtx xops[4];
- int size = GET_MODE_SIZE (GET_MODE (src));
-
- xops[0] = src;
- xops[1] = AT_SP (Pmode);
- xops[2] = GEN_INT (size);
- xops[3] = stack_pointer_rtx;
-
- if (size > UNITS_PER_WORD)
- {
- rtx high;
-
- if (size > 2 * UNITS_PER_WORD)
- {
- high = gen_rtx_REG (SImode, REGNO (src) + 2);
- output_asm_insn (AS1 (push%L0,%0), &high);
- }
-
- high = gen_rtx_REG (SImode, REGNO (src) + 1);
- output_asm_insn (AS1 (push%L0,%0), &high);
- }
-
- output_asm_insn (AS1 (push%L0,%0), &src);
- output_asm_insn (template, xops);
- output_asm_insn (AS2 (add%L3,%2,%3), xops);
-}
-
-/* Output an insn to pop an value from the 387 top-of-stack to 386
- register DEST. The 387 register stack is popped if DIES is true. If
- the mode of DEST is an integer mode, a `fist' integer store is done,
- otherwise a `fst' float store is done. */
-
-void
-output_to_reg (dest, dies, scratch_mem)
- rtx dest;
- int dies;
- rtx scratch_mem;
-{
- rtx xops[4];
- int size = GET_MODE_SIZE (GET_MODE (dest));
-
- if (! scratch_mem)
- xops[0] = AT_SP (Pmode);
- else
- xops[0] = scratch_mem;
-
- xops[1] = stack_pointer_rtx;
- xops[2] = GEN_INT (size);
- xops[3] = dest;
-
- if (! scratch_mem)
- output_asm_insn (AS2 (sub%L1,%2,%1), xops);
-
- if (GET_MODE_CLASS (GET_MODE (dest)) == MODE_INT)
- {
- if (dies)
- output_asm_insn (AS1 (fistp%z3,%y0), xops);
- else if (GET_MODE (xops[3]) == DImode && ! dies)
- {
- /* There is no DImode version of this without a stack pop, so
- we must emulate it. It doesn't matter much what the second
- instruction is, because the value being pushed on the FP stack
- is not used except for the following stack popping store.
- This case can only happen without optimization, so it doesn't
- matter that it is inefficient. */
- output_asm_insn (AS1 (fistp%z3,%0), xops);
- output_asm_insn (AS1 (fild%z3,%0), xops);
- }
- else
- output_asm_insn (AS1 (fist%z3,%y0), xops);
- }
-
- else if (GET_MODE_CLASS (GET_MODE (dest)) == MODE_FLOAT)
- {
- if (dies)
- output_asm_insn (AS1 (fstp%z3,%y0), xops);
- else
- {
- if (GET_MODE (dest) == XFmode)
- {
- output_asm_insn (AS1 (fstp%z3,%y0), xops);
- output_asm_insn (AS1 (fld%z3,%y0), xops);
- }
- else
- output_asm_insn (AS1 (fst%z3,%y0), xops);
- }
- }
-
- else
- abort ();
-
- if (! scratch_mem)
- output_asm_insn (AS1 (pop%L0,%0), &dest);
- else
- output_asm_insn (AS2 (mov%L0,%0,%3), xops);
-
-
- if (size > UNITS_PER_WORD)
- {
- dest = gen_rtx_REG (SImode, REGNO (dest) + 1);
- if (! scratch_mem)
- output_asm_insn (AS1 (pop%L0,%0), &dest);
- else
- {
- xops[0] = adj_offsettable_operand (xops[0], 4);
- xops[3] = dest;
- output_asm_insn (AS2 (mov%L0,%0,%3), xops);
- }
-
- if (size > 2 * UNITS_PER_WORD)
- {
- dest = gen_rtx_REG (SImode, REGNO (dest) + 1);
- if (! scratch_mem)
- output_asm_insn (AS1 (pop%L0,%0), &dest);
- else
- {
- xops[0] = adj_offsettable_operand (xops[0], 4);
- output_asm_insn (AS2 (mov%L0,%0,%3), xops);
- }
- }
- }
-}
-
char *
singlemove_string (operands)
rtx *operands;
@@ -990,32 +903,6 @@ singlemove_string (operands)
}
}
-/* Return a REG that occurs in ADDR with coefficient 1.
- ADDR can be effectively incremented by incrementing REG. */
-
-static rtx
-find_addr_reg (addr)
- rtx addr;
-{
- while (GET_CODE (addr) == PLUS)
- {
- if (GET_CODE (XEXP (addr, 0)) == REG)
- addr = XEXP (addr, 0);
- else if (GET_CODE (XEXP (addr, 1)) == REG)
- addr = XEXP (addr, 1);
- else if (CONSTANT_P (XEXP (addr, 0)))
- addr = XEXP (addr, 1);
- else if (CONSTANT_P (XEXP (addr, 1)))
- addr = XEXP (addr, 0);
- else
- abort ();
- }
-
- if (GET_CODE (addr) == REG)
- return addr;
- abort ();
-}
-
/* Output an insn to add the constant N to the register X. */
static void
@@ -1053,7 +940,6 @@ output_move_double (operands)
rtx latehalf[2];
rtx middlehalf[2];
rtx xops[2];
- rtx addreg0 = 0, addreg1 = 0;
int dest_overlapped_low = 0;
int size = GET_MODE_SIZE (GET_MODE (operands[0]));
@@ -1090,11 +976,14 @@ output_move_double (operands)
else
optype1 = RNDOP;
- /* Check for the cases that the operand constraints are not
- supposed to allow to happen. Abort if we get one,
- because generating code for these cases is painful. */
+ /* Check for the cases that are not supposed to happen
+ either due to the operand constraints or the fact
+ that all memory operands on the x86 are offsettable.
+ Abort if we get one, because generating code for these
+ cases is painful. */
- if (optype0 == RNDOP || optype1 == RNDOP)
+ if (optype0 == RNDOP || optype1 == RNDOP
+ || optype0 == MEMOP || optype1 == MEMOP)
abort ();
/* If one operand is decrementing and one is incrementing
@@ -1129,15 +1018,6 @@ output_move_double (operands)
optype1 = OFFSOP;
}
- /* If an operand is an unoffsettable memory ref, find a register
- we can increment temporarily to make it refer to the second word. */
-
- if (optype0 == MEMOP)
- addreg0 = find_addr_reg (XEXP (operands[0], 0));
-
- if (optype1 == MEMOP)
- addreg1 = find_addr_reg (XEXP (operands[1], 0));
-
/* Ok, we can do one word at a time.
Normally we do the low-numbered word first,
but if either operand is autodecrementing then we
@@ -1234,8 +1114,7 @@ output_move_double (operands)
emit the move late-half first. Otherwise, compute the MEM address
into the upper part of N and use that as a pointer to the memory
operand. */
- if (optype0 == REGOP
- && (optype1 == OFFSOP || optype1 == MEMOP))
+ if (optype0 == REGOP && optype1 == OFFSOP)
{
if (reg_mentioned_p (operands[0], XEXP (operands[1], 0))
&& reg_mentioned_p (latehalf[0], XEXP (operands[1], 0)))
@@ -1267,10 +1146,6 @@ output_move_double (operands)
|| reg_mentioned_p (latehalf[0], XEXP (operands[1], 0)))
goto compadr;
- /* JRV says this can't happen: */
- if (addreg0 || addreg1)
- abort ();
-
/* Only the middle reg conflicts; simply put it last. */
output_asm_insn (singlemove_string (operands), operands);
output_asm_insn (singlemove_string (latehalf), latehalf);
@@ -1305,29 +1180,11 @@ output_move_double (operands)
|| REGNO (operands[0]) == REGNO (latehalf[1])))
|| dest_overlapped_low)
{
- /* Make any unoffsettable addresses point at high-numbered word. */
- if (addreg0)
- asm_add (size-4, addreg0);
- if (addreg1)
- asm_add (size-4, addreg1);
-
- /* Do that word. */
+ /* Do the high-numbered word. */
output_asm_insn (singlemove_string (latehalf), latehalf);
- /* Undo the adds we just did. */
- if (addreg0)
- asm_add (-4, addreg0);
- if (addreg1)
- asm_add (-4, addreg1);
-
if (size == 12)
- {
- output_asm_insn (singlemove_string (middlehalf), middlehalf);
- if (addreg0)
- asm_add (-4, addreg0);
- if (addreg1)
- asm_add (-4, addreg1);
- }
+ output_asm_insn (singlemove_string (middlehalf), middlehalf);
/* Do low-numbered word. */
return singlemove_string (operands);
@@ -1339,30 +1196,11 @@ output_move_double (operands)
/* Do the middle one of the three words for long double */
if (size == 12)
- {
- if (addreg0)
- asm_add (4, addreg0);
- if (addreg1)
- asm_add (4, addreg1);
+ output_asm_insn (singlemove_string (middlehalf), middlehalf);
- output_asm_insn (singlemove_string (middlehalf), middlehalf);
- }
-
- /* Make any unoffsettable addresses point at high-numbered word. */
- if (addreg0)
- asm_add (4, addreg0);
- if (addreg1)
- asm_add (4, addreg1);
-
- /* Do that word. */
+ /* Do the high-numbered word. */
output_asm_insn (singlemove_string (latehalf), latehalf);
- /* Undo the adds we just did. */
- if (addreg0)
- asm_add (4-size, addreg0);
- if (addreg1)
- asm_add (4-size, addreg1);
-
return "";
}
@@ -1447,125 +1285,6 @@ output_move_pushmem (operands, insn, length, tmp_start, n_operands)
return "";
}
-/* Output the appropriate code to move data between two memory locations */
-
-char *
-output_move_memory (operands, insn, length, tmp_start, n_operands)
- rtx operands[];
- rtx insn;
- int length;
- int tmp_start;
- int n_operands;
-{
- struct
- {
- char *load;
- char *store;
- rtx xops[3];
- } tmp_info[MAX_TMPS];
-
- rtx dest = operands[0];
- rtx src = operands[1];
- rtx qi_tmp = NULL_RTX;
- int max_tmps = 0;
- int offset = 0;
- int i, num_tmps;
- rtx xops[3];
-
- if (GET_CODE (dest) == MEM
- && GET_CODE (XEXP (dest, 0)) == PRE_INC
- && XEXP (XEXP (dest, 0), 0) == stack_pointer_rtx)
- return output_move_pushmem (operands, insn, length, tmp_start, n_operands);
-
- if (! offsettable_memref_p (src))
- fatal_insn ("Source is not offsettable", insn);
-
- if (! offsettable_memref_p (dest))
- fatal_insn ("Destination is not offsettable", insn);
-
- /* Figure out which temporary registers we have available */
- for (i = tmp_start; i < n_operands; i++)
- {
- if (GET_CODE (operands[i]) == REG)
- {
- if ((length & 1) != 0 && qi_tmp == 0 && QI_REG_P (operands[i]))
- qi_tmp = operands[i];
-
- if (reg_overlap_mentioned_p (operands[i], dest))
- fatal_insn ("Temporary register overlaps the destination", insn);
-
- if (reg_overlap_mentioned_p (operands[i], src))
- fatal_insn ("Temporary register overlaps the source", insn);
-
- tmp_info[max_tmps++].xops[2] = operands[i];
- if (max_tmps == MAX_TMPS)
- break;
- }
- }
-
- if (max_tmps == 0)
- fatal_insn ("No scratch registers were found to do memory->memory moves",
- insn);
-
- if ((length & 1) != 0)
- {
- if (qi_tmp == 0)
- fatal_insn ("No byte register found when moving odd # of bytes.",
- insn);
- }
-
- while (length > 1)
- {
- for (num_tmps = 0; num_tmps < max_tmps; num_tmps++)
- {
- if (length >= 4)
- {
- tmp_info[num_tmps].load = AS2(mov%L0,%1,%2);
- tmp_info[num_tmps].store = AS2(mov%L0,%2,%0);
- tmp_info[num_tmps].xops[0]
- = adj_offsettable_operand (dest, offset);
- tmp_info[num_tmps].xops[1]
- = adj_offsettable_operand (src, offset);
-
- offset += 4;
- length -= 4;
- }
-
- else if (length >= 2)
- {
- tmp_info[num_tmps].load = AS2(mov%W0,%1,%2);
- tmp_info[num_tmps].store = AS2(mov%W0,%2,%0);
- tmp_info[num_tmps].xops[0]
- = adj_offsettable_operand (dest, offset);
- tmp_info[num_tmps].xops[1]
- = adj_offsettable_operand (src, offset);
-
- offset += 2;
- length -= 2;
- }
- else
- break;
- }
-
- for (i = 0; i < num_tmps; i++)
- output_asm_insn (tmp_info[i].load, tmp_info[i].xops);
-
- for (i = 0; i < num_tmps; i++)
- output_asm_insn (tmp_info[i].store, tmp_info[i].xops);
- }
-
- if (length == 1)
- {
- xops[0] = adj_offsettable_operand (dest, offset);
- xops[1] = adj_offsettable_operand (src, offset);
- xops[2] = qi_tmp;
- output_asm_insn (AS2(mov%B0,%1,%2), xops);
- output_asm_insn (AS2(mov%B0,%2,%0), xops);
- }
-
- return "";
-}
-
int
standard_80387_constant_p (x)
rtx x;
@@ -1593,6 +1312,7 @@ standard_80387_constant_p (x)
/* Note that on the 80387, other constants, such as pi,
are much slower to load as standard constants
than to load from doubles in memory! */
+ /* ??? Not true on K6: all constants are equal cost. */
#endif
return 0;
@@ -1653,6 +1373,17 @@ symbolic_operand (op, mode)
}
}
+/* Return nonzero if OP is a constant shift count small enough to
+ encode into an lea instruction. */
+
+int
+small_shift_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return (GET_CODE (op) == CONST_INT && INTVAL (op) > 0 && INTVAL (op) < 4);
+}
+
/* Test for a valid operand for a call instruction.
Don't allow the arg pointer register or virtual regs
since they may change into reg + const, which the patterns
@@ -1999,14 +1730,19 @@ load_pic_register (do_rtl)
{
emit_insn (gen_prologue_get_pc (xops[0], xops[1]));
emit_insn (gen_prologue_set_got (xops[0],
- gen_rtx (SYMBOL_REF, Pmode,
- "$_GLOBAL_OFFSET_TABLE_"),
+#ifdef YES_UNDERSCORES
+ gen_rtx_SYMBOL_REF (Pmode,
+ "$__GLOBAL_OFFSET_TABLE_"),
+#else
+ gen_rtx_SYMBOL_REF (Pmode,
+ "$_GLOBAL_OFFSET_TABLE_"),
+#endif
xops[1]));
}
else
{
output_asm_insn (AS1 (call,%X1), xops);
- output_asm_insn ("addl $_GLOBAL_OFFSET_TABLE_,%0", xops);
+ output_asm_insn ("addl $%__GLOBAL_OFFSET_TABLE_,%0", xops);
pic_label_rtx = 0;
}
}
@@ -2029,7 +1765,7 @@ load_pic_register (do_rtl)
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
CODE_LABEL_NUMBER (xops[1]));
output_asm_insn (AS1 (pop%L0,%0), xops);
- output_asm_insn ("addl $_GLOBAL_OFFSET_TABLE_+[.-%P1],%0", xops);
+ output_asm_insn ("addl $%__GLOBAL_OFFSET_TABLE_+[.-%P1],%0", xops);
}
}
@@ -2041,6 +1777,66 @@ load_pic_register (do_rtl)
emit_insn (gen_blockage ());
}
+/* Compute the size of local storage taking into consideration the
+ desired stack alignment which is to be maintained. Also determine
+ the number of registers saved below the local storage. */
+
+HOST_WIDE_INT
+ix86_compute_frame_size (size, nregs_on_stack)
+ HOST_WIDE_INT size;
+ int *nregs_on_stack;
+{
+ int limit;
+ int nregs;
+ int regno;
+ int padding;
+ int pic_reg_used = PIC_REG_USED;
+ HOST_WIDE_INT total_size;
+
+ limit = frame_pointer_needed
+ ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
+
+ nregs = 0;
+
+ for (regno = limit - 1; regno >= 0; regno--)
+ if ((regs_ever_live[regno] && ! call_used_regs[regno])
+ || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
+ nregs++;
+
+ padding = 0;
+ total_size = size + (nregs * UNITS_PER_WORD);
+
+#ifdef PREFERRED_STACK_BOUNDARY
+ {
+ int offset;
+ int preferred_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
+
+ offset = 4;
+ if (frame_pointer_needed)
+ offset += UNITS_PER_WORD;
+
+ total_size += offset;
+
+ padding = ((total_size + preferred_alignment - 1)
+ & -preferred_alignment) - total_size;
+
+ if (padding < (((offset + preferred_alignment - 1)
+ & -preferred_alignment) - offset))
+ padding += preferred_alignment;
+
+ /* Don't bother aligning the stack of a leaf function
+ which doesn't allocate any stack slots. */
+ if (size == 0 && current_function_is_leaf)
+ padding = 0;
+ }
+#endif
+
+ if (nregs_on_stack)
+ *nregs_on_stack = nregs;
+
+ return size + padding;
+}
+
static void
ix86_prologue (do_rtl)
int do_rtl;
@@ -2049,7 +1845,7 @@ ix86_prologue (do_rtl)
int limit;
rtx xops[4];
int pic_reg_used = PIC_REG_USED;
- long tsize = get_frame_size ();
+ HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), (int *)0);
rtx insn;
int cfa_offset = INCOMING_FRAME_SP_OFFSET, cfa_store_offset = cfa_offset;
@@ -2184,6 +1980,10 @@ ix86_prologue (do_rtl)
}
}
+#ifdef SUBTARGET_PROLOGUE
+ SUBTARGET_PROLOGUE;
+#endif
+
if (pic_reg_used)
load_pic_register (do_rtl);
@@ -2256,31 +2056,17 @@ ix86_epilogue (do_rtl)
int do_rtl;
{
register int regno;
- register int nregs, limit;
- int offset;
+ register int limit;
+ int nregs;
rtx xops[3];
int pic_reg_used = PIC_REG_USED;
- long tsize = get_frame_size ();
-
- /* Compute the number of registers to pop */
-
- limit = (frame_pointer_needed ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM);
+ int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
+ HOST_WIDE_INT offset;
+ HOST_WIDE_INT tsize = ix86_compute_frame_size (get_frame_size (), &nregs);
- nregs = 0;
-
- for (regno = limit - 1; regno >= 0; regno--)
- if ((regs_ever_live[regno] && ! call_used_regs[regno])
- || (regno == PIC_OFFSET_TABLE_REGNUM && pic_reg_used))
- nregs++;
+ /* sp is often unreliable so we may have to go off the frame pointer. */
- /* sp is often unreliable so we must go off the frame pointer.
-
- In reality, we may not care if sp is unreliable, because we can restore
- the register relative to the frame pointer. In theory, since each move
- is the same speed as a pop, and we don't need the leal, this is faster.
- For now restore multiple registers the old way. */
-
- offset = - tsize - (nregs * UNITS_PER_WORD);
+ offset = -(tsize + nregs * UNITS_PER_WORD);
xops[2] = stack_pointer_rtx;
@@ -2295,9 +2081,17 @@ ix86_epilogue (do_rtl)
if (flag_pic || profile_flag || profile_block_flag)
emit_insn (gen_blockage ());
- if (nregs > 1 || ! frame_pointer_needed)
+ /* If we're only restoring one register and sp is not valid then
+ using a move instruction to restore the register since it's
+ less work than reloading sp and popping the register. Otherwise,
+ restore sp (if necessary) and pop the registers. */
+
+ limit = frame_pointer_needed
+ ? FRAME_POINTER_REGNUM : STACK_POINTER_REGNUM;
+
+ if (nregs > 1 || sp_valid)
{
- if (frame_pointer_needed)
+ if ( !sp_valid )
{
xops[0] = adj_offsettable_operand (AT_BP (QImode), offset);
if (do_rtl)
@@ -2505,6 +2299,37 @@ do { \
} while (0)
int
+legitimate_pic_address_disp_p (disp)
+ register rtx disp;
+{
+ if (GET_CODE (disp) != CONST)
+ return 0;
+ disp = XEXP (disp, 0);
+
+ if (GET_CODE (disp) == PLUS)
+ {
+ if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
+ return 0;
+ disp = XEXP (disp, 0);
+ }
+
+ if (GET_CODE (disp) != UNSPEC
+ || XVECLEN (disp, 0) != 1)
+ return 0;
+
+ /* Must be @GOT or @GOTOFF. */
+ if (XINT (disp, 1) != 6
+ && XINT (disp, 1) != 7)
+ return 0;
+
+ if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
+ && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
+ return 0;
+
+ return 1;
+}
+
+int
legitimate_address_p (mode, addr, strict)
enum machine_mode mode;
register rtx addr;
@@ -2525,7 +2350,7 @@ legitimate_address_p (mode, addr, strict)
}
if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
- base = addr;
+ base = addr;
else if (GET_CODE (addr) == PLUS)
{
@@ -2615,6 +2440,12 @@ legitimate_address_p (mode, addr, strict)
return FALSE;
}
+ if (GET_MODE (base) != Pmode)
+ {
+ ADDR_INVALID ("Base is not in Pmode.\n", base);
+ return FALSE;
+ }
+
if ((strict && ! REG_OK_FOR_BASE_STRICT_P (base))
|| (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (base)))
{
@@ -2636,6 +2467,12 @@ legitimate_address_p (mode, addr, strict)
return FALSE;
}
+ if (GET_MODE (indx) != Pmode)
+ {
+ ADDR_INVALID ("Index is not in Pmode.\n", indx);
+ return FALSE;
+ }
+
if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (indx))
|| (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (indx)))
{
@@ -2665,20 +2502,10 @@ legitimate_address_p (mode, addr, strict)
}
}
- /* Validate displacement
- Constant pool addresses must be handled special. They are
- considered legitimate addresses, but only if not used with regs.
- When printed, the output routines know to print the reference with the
- PIC reg, even though the PIC reg doesn't appear in the RTL. */
+ /* Validate displacement. */
if (disp)
{
- if (GET_CODE (disp) == SYMBOL_REF
- && CONSTANT_POOL_ADDRESS_P (disp)
- && base == 0
- && indx == 0)
- ;
-
- else if (!CONSTANT_ADDRESS_P (disp))
+ if (!CONSTANT_ADDRESS_P (disp))
{
ADDR_INVALID ("Displacement is not valid.\n", disp);
return FALSE;
@@ -2690,20 +2517,32 @@ legitimate_address_p (mode, addr, strict)
return FALSE;
}
- else if (flag_pic && SYMBOLIC_CONST (disp)
- && base != pic_offset_table_rtx
- && (indx != pic_offset_table_rtx || scale != NULL_RTX))
+ if (flag_pic && SYMBOLIC_CONST (disp))
{
- ADDR_INVALID ("Displacement is an invalid pic reference.\n", disp);
- return FALSE;
+ if (! legitimate_pic_address_disp_p (disp))
+ {
+ ADDR_INVALID ("Displacement is an invalid PIC construct.\n",
+ disp);
+ return FALSE;
+ }
+
+ if (base != pic_offset_table_rtx
+ && (indx != pic_offset_table_rtx || scale != NULL_RTX))
+ {
+ ADDR_INVALID ("PIC displacement against invalid base.\n", disp);
+ return FALSE;
+ }
}
- else if (HALF_PIC_P () && HALF_PIC_ADDRESS_P (disp)
- && (base != NULL_RTX || indx != NULL_RTX))
+ else if (HALF_PIC_P ())
{
- ADDR_INVALID ("Displacement is an invalid half-pic reference.\n",
- disp);
- return FALSE;
+ if (! HALF_PIC_ADDRESS_P (disp)
+ || (base != NULL_RTX || indx != NULL_RTX))
+ {
+ ADDR_INVALID ("Displacement is an invalid half-pic reference.\n",
+ disp);
+ return FALSE;
+ }
}
}
@@ -2717,29 +2556,20 @@ legitimate_address_p (mode, addr, strict)
/* Return a legitimate reference for ORIG (an address) using the
register REG. If REG is 0, a new pseudo is generated.
- There are three types of references that must be handled:
+ There are two types of references that must be handled:
1. Global data references must load the address from the GOT, via
the PIC reg. An insn is emitted to do this load, and the reg is
returned.
- 2. Static data references must compute the address as an offset
- from the GOT, whose base is in the PIC reg. An insn is emitted to
- compute the address into a reg, and the reg is returned. Static
- data objects have SYMBOL_REF_FLAG set to differentiate them from
- global data objects.
-
- 3. Constant pool addresses must be handled special. They are
- considered legitimate addresses, but only if not used with regs.
- When printed, the output routines know to print the reference with the
- PIC reg, even though the PIC reg doesn't appear in the RTL.
+ 2. Static data references, constant pool addresses, and code labels
+ compute the address as an offset from the GOT, whose base is in
+ the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
+ differentiate them from global data objects. The returned
+ address is the PIC reg + an unspec constant.
GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
- reg also appears in the address (except for constant pool references,
- noted above).
-
- "switch" statements also require special handling when generating
- PIC code. See comments by the `casesi' insn in i386.md for details. */
+ reg also appears in the address. */
rtx
legitimize_pic_address (orig, reg)
@@ -2748,60 +2578,99 @@ legitimize_pic_address (orig, reg)
{
rtx addr = orig;
rtx new = orig;
+ rtx base;
- if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
+ if (GET_CODE (addr) == LABEL_REF
+ || (GET_CODE (addr) == SYMBOL_REF
+ && (CONSTANT_POOL_ADDRESS_P (addr)
+ || SYMBOL_REF_FLAG (addr))))
{
- if (GET_CODE (addr) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (addr))
- reg = new = orig;
- else
- {
- if (reg == 0)
- reg = gen_reg_rtx (Pmode);
+ /* This symbol may be referenced via a displacement from the PIC
+ base address (@GOTOFF). */
- if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_FLAG (addr))
- || GET_CODE (addr) == LABEL_REF)
- new = gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig);
- else
- new = gen_rtx_MEM (Pmode,
- gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
+ current_function_uses_pic_offset_table = 1;
+ new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 7);
+ new = gen_rtx_CONST (VOIDmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+ if (reg != 0)
+ {
emit_move_insn (reg, new);
+ new = reg;
}
- current_function_uses_pic_offset_table = 1;
- return reg;
}
-
- else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
+ else if (GET_CODE (addr) == SYMBOL_REF)
{
- rtx base;
+ /* This symbol must be referenced via a load from the
+ Global Offset Table (@GOT). */
+ current_function_uses_pic_offset_table = 1;
+ new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, addr), 6);
+ new = gen_rtx_CONST (VOIDmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+ new = gen_rtx_MEM (Pmode, new);
+ RTX_UNCHANGING_P (new) = 1;
+
+ if (reg == 0)
+ reg = gen_reg_rtx (Pmode);
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ else
+ {
if (GET_CODE (addr) == CONST)
{
addr = XEXP (addr, 0);
- if (GET_CODE (addr) != PLUS)
- abort ();
+ if (GET_CODE (addr) == UNSPEC)
+ {
+ /* Check that the unspec is one of the ones we generate? */
+ }
+ else if (GET_CODE (addr) != PLUS)
+ abort();
}
+ if (GET_CODE (addr) == PLUS)
+ {
+ rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+ /* Check first to see if this is a constant offset from a @GOTOFF
+ symbol reference. */
+ if ((GET_CODE (op0) == LABEL_REF
+ || (GET_CODE (op0) == SYMBOL_REF
+ && (CONSTANT_POOL_ADDRESS_P (op0)
+ || SYMBOL_REF_FLAG (op0))))
+ && GET_CODE (op1) == CONST_INT)
+ {
+ current_function_uses_pic_offset_table = 1;
+ new = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, op0), 7);
+ new = gen_rtx_PLUS (VOIDmode, new, op1);
+ new = gen_rtx_CONST (VOIDmode, new);
+ new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
- if (XEXP (addr, 0) == pic_offset_table_rtx)
- return orig;
-
- if (reg == 0)
- reg = gen_reg_rtx (Pmode);
-
- base = legitimize_pic_address (XEXP (addr, 0), reg);
- addr = legitimize_pic_address (XEXP (addr, 1),
- base == reg ? NULL_RTX : reg);
-
- if (GET_CODE (addr) == CONST_INT)
- return plus_constant (base, INTVAL (addr));
+ if (reg != 0)
+ {
+ emit_move_insn (reg, new);
+ new = reg;
+ }
+ }
+ else
+ {
+ base = legitimize_pic_address (XEXP (addr, 0), reg);
+ new = legitimize_pic_address (XEXP (addr, 1),
+ base == reg ? NULL_RTX : reg);
- if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
- {
- base = gen_rtx (PLUS, Pmode, base, XEXP (addr, 0));
- addr = XEXP (addr, 1);
+ if (GET_CODE (new) == CONST_INT)
+ new = plus_constant (base, INTVAL (new));
+ else
+ {
+ if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
+ {
+ base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
+ new = XEXP (new, 1);
+ }
+ new = gen_rtx_PLUS (Pmode, base, new);
+ }
+ }
}
-
- return gen_rtx (PLUS, Pmode, base, addr);
}
return new;
}
@@ -2816,7 +2685,7 @@ emit_pic_move (operands, mode)
rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
- operands[1] = force_reg (SImode, operands[1]);
+ operands[1] = force_reg (Pmode, operands[1]);
else
operands[1] = legitimize_pic_address (operands[1], temp);
}
@@ -2867,8 +2736,8 @@ legitimize_address (x, oldx, mode)
&& (log = (unsigned)exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
{
changed = 1;
- x = gen_rtx (MULT, Pmode, force_reg (Pmode, XEXP (x, 0)),
- GEN_INT (1 << log));
+ x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
+ GEN_INT (1 << log));
}
if (GET_CODE (x) == PLUS)
@@ -3029,31 +2898,14 @@ output_pic_addr_const (file, x, code)
break;
case SYMBOL_REF:
- case LABEL_REF:
- if (GET_CODE (x) == SYMBOL_REF)
- assemble_name (file, XSTR (x, 0));
- else
- {
- ASM_GENERATE_INTERNAL_LABEL (buf, "L",
- CODE_LABEL_NUMBER (XEXP (x, 0)));
- assemble_name (asm_out_file, buf);
- }
-
- if (code == 'X')
- ; /* No suffix, dammit. */
- else if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
- fprintf (file, "@GOTOFF(%%ebx)");
- else if (code == 'P')
- fprintf (file, "@PLT");
- else if (GET_CODE (x) == LABEL_REF)
- fprintf (file, "@GOTOFF");
- else if (! SYMBOL_REF_FLAG (x))
- fprintf (file, "@GOT");
- else
- fprintf (file, "@GOTOFF");
-
+ assemble_name (file, XSTR (x, 0));
+ if (code == 'P' && ! SYMBOL_REF_FLAG (x))
+ fputs ("@PLT", file);
break;
+ case LABEL_REF:
+ x = XEXP (x, 0);
+ /* FALLTHRU */
case CODE_LABEL:
ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
assemble_name (asm_out_file, buf);
@@ -3091,17 +2943,17 @@ output_pic_addr_const (file, x, code)
if (GET_CODE (XEXP (x, 0)) == CONST_INT)
{
output_pic_addr_const (file, XEXP (x, 0), code);
- if (INTVAL (XEXP (x, 1)) >= 0)
- fprintf (file, "+");
+ fprintf (file, "+");
output_pic_addr_const (file, XEXP (x, 1), code);
}
- else
+ else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
{
output_pic_addr_const (file, XEXP (x, 1), code);
- if (INTVAL (XEXP (x, 0)) >= 0)
- fprintf (file, "+");
+ fprintf (file, "+");
output_pic_addr_const (file, XEXP (x, 0), code);
}
+ else
+ abort ();
break;
case MINUS:
@@ -3110,11 +2962,141 @@ output_pic_addr_const (file, x, code)
output_pic_addr_const (file, XEXP (x, 1), code);
break;
+ case UNSPEC:
+ if (XVECLEN (x, 0) != 1)
+ abort ();
+ output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+ switch (XINT (x, 1))
+ {
+ case 6:
+ fputs ("@GOT", file);
+ break;
+ case 7:
+ fputs ("@GOTOFF", file);
+ break;
+ case 8:
+ fputs ("@PLT", file);
+ break;
+ default:
+ output_operand_lossage ("invalid UNSPEC as operand");
+ break;
+ }
+ break;
+
default:
output_operand_lossage ("invalid expression as operand");
}
}
+static void
+put_jump_code (code, reverse, file)
+ enum rtx_code code;
+ int reverse;
+ FILE *file;
+{
+ int flags = cc_prev_status.flags;
+ int ieee = (TARGET_IEEE_FP && (flags & CC_IN_80387)
+ && !(cc_prev_status.flags & CC_FCOMI));
+ const char *suffix;
+
+ if (flags & CC_Z_IN_NOT_C)
+ switch (code)
+ {
+ case EQ:
+ fputs (reverse ? "c" : "nc", file);
+ return;
+
+ case NE:
+ fputs (reverse ? "nc" : "c", file);
+ return;
+
+ default:
+ abort ();
+ }
+ if (ieee)
+ {
+ switch (code)
+ {
+ case LE:
+ suffix = reverse ? "ae" : "b";
+ break;
+ case GT:
+ case LT:
+ case GE:
+ suffix = reverse ? "ne" : "e";
+ break;
+ case EQ:
+ suffix = reverse ? "ne" : "e";
+ break;
+ case NE:
+ suffix = reverse ? "e" : "ne";
+ break;
+ default:
+ abort ();
+ }
+ fputs (suffix, file);
+ return;
+ }
+ if (flags & CC_TEST_AX)
+ abort();
+ if ((flags & CC_NO_OVERFLOW) && (code == LE || code == GT))
+ abort ();
+ if (reverse)
+ code = reverse_condition (code);
+ switch (code)
+ {
+ case EQ:
+ suffix = "e";
+ break;
+
+ case NE:
+ suffix = "ne";
+ break;
+
+ case GT:
+ suffix = flags & CC_IN_80387 ? "a" : "g";
+ break;
+
+ case GTU:
+ suffix = "a";
+ break;
+
+ case LT:
+ if (flags & CC_NO_OVERFLOW)
+ suffix = "s";
+ else
+ suffix = flags & CC_IN_80387 ? "b" : "l";
+ break;
+
+ case LTU:
+ suffix = "b";
+ break;
+
+ case GE:
+ if (flags & CC_NO_OVERFLOW)
+ suffix = "ns";
+ else
+ suffix = flags & CC_IN_80387 ? "ae" : "ge";
+ break;
+
+ case GEU:
+ suffix = "ae";
+ break;
+
+ case LE:
+ suffix = flags & CC_IN_80387 ? "be" : "le";
+ break;
+
+ case LEU:
+ suffix = "be";
+ break;
+
+ default:
+ abort ();
+ }
+ fputs (suffix, file);
+}
+
/* Append the correct conditional move suffix which corresponds to CODE. */
static void
@@ -3231,12 +3213,13 @@ put_condition_code (code, reverse_cc, mode, file)
C -- print opcode suffix for set/cmov insn.
c -- like C, but print reversed condition
F -- print opcode suffix for fcmov insn.
- f -- like C, but print reversed condition
+ f -- like F, but print reversed condition
+ D -- print the opcode suffix for a jump
+ d -- like D, but print reversed condition
R -- print the prefix for register names.
z -- print the opcode suffix for the size of the current operand.
* -- print a star (in certain assembler syntax)
w -- print the operand as if it's a "word" (HImode) even if it isn't.
- c -- don't print special prefixes before constant operands.
J -- print the appropriate jump operand.
s -- print a shift double count, followed by the assemblers argument
delimiter.
@@ -3246,7 +3229,8 @@ put_condition_code (code, reverse_cc, mode, file)
k -- likewise, print the SImode name of the register.
h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
y -- print "st(0)" instead of "st" as a register.
- P -- print as a PIC constant */
+ P -- print as a PIC constant
+ _ -- output "_" if YES_UNDERSCORES */
void
print_operand (file, x, code)
@@ -3263,6 +3247,12 @@ print_operand (file, x, code)
putc ('*', file);
return;
+ case '_':
+#ifdef YES_UNDERSCORES
+ putc ('_', file);
+#endif
+ return;
+
case 'L':
PUT_OP_SIZE (code, 'l', file);
return;
@@ -3297,12 +3287,10 @@ print_operand (file, x, code)
/* this is the size of op from size of operand */
switch (GET_MODE_SIZE (GET_MODE (x)))
{
- case 1:
- PUT_OP_SIZE ('B', 'b', file);
- return;
-
case 2:
- PUT_OP_SIZE ('W', 'w', file);
+#ifdef HAVE_GAS_FILDS_FISTS
+ PUT_OP_SIZE ('W', 's', file);
+#endif
return;
case 4:
@@ -3332,6 +3320,9 @@ print_operand (file, x, code)
PUT_OP_SIZE ('Q', 'l', file);
return;
+
+ default:
+ abort ();
}
case 'b':
@@ -3374,6 +3365,14 @@ print_operand (file, x, code)
return;
+ case 'D':
+ put_jump_code (GET_CODE (x), 0, file);
+ return;
+
+ case 'd':
+ put_jump_code (GET_CODE (x), 1, file);
+ return;
+
/* This is used by the conditional move instructions. */
case 'C':
put_condition_code (GET_CODE (x), 0, MODE_INT, file);
@@ -3482,6 +3481,11 @@ print_operand_address (file, addr)
switch (GET_CODE (addr))
{
case REG:
+ /* ESI addressing makes instruction vector decoded on the K6. We can
+ avoid this by ESI+0 addressing. */
+ if (REGNO_REG_CLASS (REGNO (addr)) == SIREG
+ && ix86_cpu == PROCESSOR_K6 && !optimize_size)
+ output_addr_const (file, const0_rtx);
ADDR_BEG (file);
fprintf (file, "%se", RP);
fputs (hi_reg_name[REGNO (addr)], file);
@@ -3597,8 +3601,16 @@ print_operand_address (file, addr)
ireg = XEXP (addr, 0);
}
- output_addr_const (file, const0_rtx);
- PRINT_B_I_S (NULL_RTX, ireg, scale, file);
+ /* (reg,reg,) is shorter than (,reg,2). */
+ if (scale == 2)
+ {
+ PRINT_B_I_S (ireg, ireg, 1, file);
+ }
+ else
+ {
+ output_addr_const (file, const0_rtx);
+ PRINT_B_I_S (NULL_RTX, ireg, scale, file);
+ }
}
break;
@@ -3647,8 +3659,7 @@ notice_update_cc (exp)
if (REG_P (SET_DEST (exp))
&& (REG_P (SET_SRC (exp)) || GET_CODE (SET_SRC (exp)) == MEM
|| GET_RTX_CLASS (GET_CODE (SET_SRC (exp))) == '<'
- || (GET_CODE (SET_SRC (exp)) == IF_THEN_ELSE
- && GET_MODE_CLASS (GET_MODE (SET_DEST (exp))) == MODE_INT)))
+ || GET_CODE (SET_SRC (exp)) == IF_THEN_ELSE))
{
if (cc_status.value1
&& reg_overlap_mentioned_p (SET_DEST (exp), cc_status.value1))
@@ -3750,7 +3761,7 @@ notice_update_cc (exp)
if (stack_regs_mentioned_p (SET_SRC (XVECEXP (exp, 0, 0))))
{
cc_status.flags |= CC_IN_80387;
- if (0 && TARGET_CMOVE && stack_regs_mentioned_p
+ if (TARGET_CMOVE && stack_regs_mentioned_p
(XEXP (SET_SRC (XVECEXP (exp, 0, 0)), 1)))
cc_status.flags |= CC_FCOMI;
}
@@ -3782,7 +3793,12 @@ split_di (operands, num, lo_half, hi_half)
while (num--)
{
rtx op = operands[num];
- if (GET_CODE (op) == REG)
+ if (! reload_completed)
+ {
+ lo_half[num] = gen_lowpart (SImode, op);
+ hi_half[num] = gen_highpart (SImode, op);
+ }
+ else if (GET_CODE (op) == REG)
{
lo_half[num] = gen_rtx_REG (SImode, REGNO (op));
hi_half[num] = gen_rtx_REG (SImode, REGNO (op) + 1);
@@ -3880,35 +3896,19 @@ output_387_binary_op (insn, operands)
switch (GET_CODE (operands[3]))
{
case PLUS:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- base_op = "fiadd";
- else
- base_op = "fadd";
+ base_op = "fadd";
break;
case MINUS:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- base_op = "fisub";
- else
- base_op = "fsub";
+ base_op = "fsub";
break;
case MULT:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- base_op = "fimul";
- else
- base_op = "fmul";
+ base_op = "fmul";
break;
case DIV:
- if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
- || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
- base_op = "fidiv";
- else
- base_op = "fdiv";
+ base_op = "fdiv";
break;
default:
@@ -3931,17 +3931,8 @@ output_387_binary_op (insn, operands)
if (GET_CODE (operands[2]) == MEM)
return strcat (buf, AS1 (%z2,%2));
- if (NON_STACK_REG_P (operands[1]))
- {
- output_op_from_reg (operands[1], strcat (buf, AS1 (%z0,%1)));
- return "";
- }
-
- else if (NON_STACK_REG_P (operands[2]))
- {
- output_op_from_reg (operands[2], strcat (buf, AS1 (%z0,%1)));
- return "";
- }
+ if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
+ abort ();
if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
{
@@ -3964,18 +3955,6 @@ output_387_binary_op (insn, operands)
if (GET_CODE (operands[2]) == MEM)
return strcat (buf, AS1 (%z2,%2));
- if (NON_STACK_REG_P (operands[1]))
- {
- output_op_from_reg (operands[1], strcat (buf, AS1 (r%z0,%1)));
- return "";
- }
-
- else if (NON_STACK_REG_P (operands[2]))
- {
- output_op_from_reg (operands[2], strcat (buf, AS1 (%z0,%1)));
- return "";
- }
-
if (! STACK_REG_P (operands[1]) || ! STACK_REG_P (operands[2]))
abort ();
@@ -4013,10 +3992,10 @@ output_387_binary_op (insn, operands)
}
/* Output code for INSN to convert a float to a signed int. OPERANDS
- are the insn operands. The output may be SFmode or DFmode and the
- input operand may be SImode or DImode. As a special case, make sure
- that the 387 stack top dies if the output mode is DImode, because the
- hardware requires this. */
+ are the insn operands. The input may be SFmode, DFmode, or XFmode
+ and the output operand may be SImode or DImode. As a special case,
+ make sure that the 387 stack top dies if the output mode is DImode,
+ because the hardware requires this. */
char *
output_fix_trunc (insn, operands)
@@ -4029,40 +4008,98 @@ output_fix_trunc (insn, operands)
if (! STACK_TOP_P (operands[1]))
abort ();
- xops[0] = GEN_INT (12);
- xops[1] = operands[4];
+ if (GET_MODE (operands[0]) == DImode && ! stack_top_dies)
+ abort ();
+
+ xops[0] = GEN_INT (0x0c00);
+ xops[1] = operands[5];
output_asm_insn (AS1 (fnstc%W2,%2), operands);
- output_asm_insn (AS2 (mov%L2,%2,%4), operands);
- output_asm_insn (AS2 (mov%B1,%0,%h1), xops);
- output_asm_insn (AS2 (mov%L4,%4,%3), operands);
+ output_asm_insn (AS2 (mov%W5,%2,%w5), operands);
+ output_asm_insn (AS2 (or%W1,%0,%w1), xops);
+ output_asm_insn (AS2 (mov%W3,%w5,%3), operands);
output_asm_insn (AS1 (fldc%W3,%3), operands);
+ xops[0] = NON_STACK_REG_P (operands[0]) ? operands[4] : operands[0];
+
+ if (stack_top_dies)
+ output_asm_insn (AS1 (fistp%z0,%y0), xops);
+ else
+ output_asm_insn (AS1 (fist%z0,%y0), xops);
+
if (NON_STACK_REG_P (operands[0]))
- output_to_reg (operands[0], stack_top_dies, operands[3]);
+ {
+ if (GET_MODE (operands[0]) == SImode)
+ output_asm_insn (AS2 (mov%L0,%4,%0), operands);
+ else
+ {
+ xops[0] = operands[0];
+ xops[1] = operands[4];
+ output_asm_insn (output_move_double (xops), xops);
+ }
+ }
- else if (GET_CODE (operands[0]) == MEM)
+ return AS1 (fldc%W2,%2);
+}
+
+/* Output code for INSN to extend a float. OPERANDS are the insn
+ operands. The output may be DFmode or XFmode and the input operand
+ may be SFmode or DFmode. Operands 2 and 3 are scratch memory and
+ are only necessary if operands 0 or 1 are non-stack registers. */
+
+void
+output_float_extend (insn, operands)
+ rtx insn;
+ rtx *operands;
+{
+ int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
+ rtx xops[2];
+
+ if (! STACK_TOP_P (operands[0]) && ! STACK_TOP_P (operands[1]))
+ abort ();
+
+ if (STACK_TOP_P (operands[0]) && STACK_TOP_P (operands[1]) && stack_top_dies)
+ return;
+
+ if (STACK_TOP_P (operands[0]) )
{
- if (stack_top_dies)
- output_asm_insn (AS1 (fistp%z0,%0), operands);
- else if (GET_MODE (operands[0]) == DImode && ! stack_top_dies)
+ if (NON_STACK_REG_P (operands[1]))
{
- /* There is no DImode version of this without a stack pop, so
- we must emulate it. It doesn't matter much what the second
- instruction is, because the value being pushed on the FP stack
- is not used except for the following stack popping store.
- This case can only happen without optimization, so it doesn't
- matter that it is inefficient. */
- output_asm_insn (AS1 (fistp%z0,%0), operands);
- output_asm_insn (AS1 (fild%z0,%0), operands);
+ if (GET_MODE (operands[1]) == SFmode)
+ output_asm_insn (AS2 (mov%L0,%1,%2), operands);
+ else
+ {
+ xops[0] = operands[2];
+ xops[1] = operands[1];
+ output_asm_insn (output_move_double (xops), xops);
+ }
}
- else
- output_asm_insn (AS1 (fist%z0,%0), operands);
+
+ xops[0] = NON_STACK_REG_P (operands[1]) ? operands[2] : operands[1];
+
+ output_asm_insn (AS1 (fld%z0,%y0), xops);
}
else
- abort ();
+ {
+ xops[0] = NON_STACK_REG_P (operands[0]) ? operands[3] : operands[0];
- return AS1 (fldc%W2,%2);
+ if (stack_top_dies
+ || (GET_CODE (xops[0]) == MEM && GET_MODE (xops[0]) == XFmode))
+ {
+ output_asm_insn (AS1 (fstp%z0,%y0), xops);
+ if (! stack_top_dies)
+ output_asm_insn (AS1 (fld%z0,%y0), xops);
+ }
+ else
+ output_asm_insn (AS1 (fst%z0,%y0), xops);
+
+ if (NON_STACK_REG_P (operands[0]))
+ {
+ xops[0] = operands[0];
+ xops[1] = operands[3];
+ output_asm_insn (output_move_double (xops), xops);
+ }
+ }
}
/* Output code for INSN to compare OPERANDS. The two operands might
@@ -4079,8 +4116,11 @@ output_float_compare (insn, operands)
rtx body = XVECEXP (PATTERN (insn), 0, 0);
int unordered_compare = GET_MODE (SET_SRC (body)) == CCFPEQmode;
rtx tmp;
+ int cc0_set = 1;
+ int i;
- if (0 && TARGET_CMOVE && STACK_REG_P (operands[1]))
+ if (TARGET_CMOVE && STACK_REG_P (operands[1])
+ && STACK_REG_P (operands[0]))
{
cc_status.flags |= CC_FCOMI;
cc_prev_status.flags &= ~CC_TEST_AX;
@@ -4102,7 +4142,7 @@ output_float_compare (insn, operands)
if (STACK_REG_P (operands[1])
&& stack_top_dies
&& find_regno_note (insn, REG_DEAD, REGNO (operands[1]))
- && REGNO (operands[1]) != FIRST_STACK_REG)
+ && REGNO (operands[1]) == FIRST_STACK_REG + 1)
{
/* If both the top of the 387 stack dies, and the other operand
is also a stack register that dies, then this must be a
@@ -4114,7 +4154,8 @@ output_float_compare (insn, operands)
{
output_asm_insn (AS2 (fucomip,%y1,%0), operands);
output_asm_insn (AS1 (fstp, %y0), operands);
- return "";
+ if (!TARGET_IEEE_FP)
+ cc0_set = 0;
}
else
output_asm_insn ("fucompp", operands);
@@ -4125,7 +4166,8 @@ output_float_compare (insn, operands)
{
output_asm_insn (AS2 (fcomip, %y1,%0), operands);
output_asm_insn (AS1 (fstp, %y0), operands);
- return "";
+ if (!TARGET_IEEE_FP)
+ cc0_set = 0;
}
else
output_asm_insn ("fcompp", operands);
@@ -4135,35 +4177,55 @@ output_float_compare (insn, operands)
{
static char buf[100];
- /* Decide if this is the integer or float compare opcode, or the
- unordered float compare. */
+ /* Decide if this is a float compare or an unordered float compare. */
if (unordered_compare)
strcpy (buf, (cc_status.flags & CC_FCOMI) ? "fucomi" : "fucom");
- else if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_FLOAT)
- strcpy (buf, (cc_status.flags & CC_FCOMI) ? "fcomi" : "fcom");
else
- strcpy (buf, "ficom");
+ strcpy (buf, (cc_status.flags & CC_FCOMI) ? "fcomi" : "fcom");
/* Modify the opcode if the 387 stack is to be popped. */
if (stack_top_dies)
strcat (buf, "p");
- if (NON_STACK_REG_P (operands[1]))
- output_op_from_reg (operands[1], strcat (buf, AS1 (%z0,%1)));
- else if (cc_status.flags & CC_FCOMI)
+ if (cc_status.flags & CC_FCOMI)
{
output_asm_insn (strcat (buf, AS2 (%z1,%y1,%0)), operands);
- return "";
+ if (!TARGET_IEEE_FP)
+ cc0_set = 0;
}
else
output_asm_insn (strcat (buf, AS1 (%z1,%y1)), operands);
}
/* Now retrieve the condition code. */
+ if (cc0_set)
+ {
+ char *r = output_fp_cc0_set (insn);
+ if (r[0]) output_asm_insn (r, operands);
+ }
+
+
+ /* We emit fstp instruction after integer comparsions to improve
+ scheduling. */
+ for (i = 0; i < 2 ; i++)
+ {
+ if (STACK_REG_P (operands[i])
+ && find_regno_note (insn, REG_DEAD, REGNO (operands[i]))
+ && REGNO (operands[i]) != FIRST_STACK_REG
+ && (!stack_top_dies || REGNO (operands[i]) != FIRST_STACK_REG + 1))
+ {
+ rtx xexp[2];
+ xexp[0] = gen_rtx_REG (DFmode,
+ REGNO (operands[i]) - (stack_top_dies != 0));
+ output_asm_insn (AS1 (fstp, %y0), xexp);
+ }
+ }
+
+ return "";
+
- return output_fp_cc0_set (insn);
}
/* Output opcodes to transfer the results of FP compare or test INSN
@@ -4179,17 +4241,19 @@ output_fp_cc0_set (insn)
rtx next;
enum rtx_code code;
- xops[0] = gen_rtx_REG (HImode, 0);
- output_asm_insn (AS1 (fnsts%W0,%0), xops);
+ if (!(cc_status.flags & CC_FCOMI))
+ {
+ xops[0] = gen_rtx_REG (HImode, 0);
+ output_asm_insn (AS1 (fnsts%W0,%0), xops);
+ }
if (! TARGET_IEEE_FP)
{
if (!(cc_status.flags & CC_REVERSED))
{
next = next_cc0_user (insn);
-
- if (GET_CODE (next) == JUMP_INSN
- && GET_CODE (PATTERN (next)) == SET
+
+ if (GET_CODE (PATTERN (next)) == SET
&& SET_DEST (PATTERN (next)) == pc_rtx
&& GET_CODE (SET_SRC (PATTERN (next))) == IF_THEN_ELSE)
code = GET_CODE (XEXP (SET_SRC (PATTERN (next)), 0));
@@ -4214,8 +4278,7 @@ output_fp_cc0_set (insn)
if (next == NULL_RTX)
abort ();
- if (GET_CODE (next) == JUMP_INSN
- && GET_CODE (PATTERN (next)) == SET
+ if (GET_CODE (PATTERN (next)) == SET
&& SET_DEST (PATTERN (next)) == pc_rtx
&& GET_CODE (SET_SRC (PATTERN (next))) == IF_THEN_ELSE)
code = GET_CODE (XEXP (SET_SRC (PATTERN (next)), 0));
@@ -4238,61 +4301,103 @@ output_fp_cc0_set (insn)
else
abort ();
- xops[0] = gen_rtx_REG (QImode, 0);
+ if (cc_status.flags & CC_FCOMI)
+ {
+ /* It is very tricky. We have to do it right. */
- switch (code)
+ xops [0] = gen_rtx_REG (QImode, 0);
+
+ switch (code)
+ {
+ case GT:
+ case GE:
+ break;
+
+ case LT:
+ output_asm_insn (AS1 (setb,%b0), xops);
+ output_asm_insn (AS1 (setp,%h0), xops);
+ output_asm_insn (AS2 (cmp%B0,%b0,%h0), xops);
+ break;
+
+ case LE:
+ output_asm_insn (AS1 (setbe,%b0), xops);
+ output_asm_insn (AS1 (setnp,%h0), xops);
+ output_asm_insn (AS2 (xor%B0,%b0,%h0), xops);
+ break;
+
+ case EQ:
+ case NE:
+ output_asm_insn (AS1 (setne,%b0), xops);
+ output_asm_insn (AS1 (setp,%h0), xops);
+ output_asm_insn (AS2 (or%B0,%b0,%h0), xops);
+ break;
+
+ case GTU:
+ case LTU:
+ case GEU:
+ case LEU:
+ default:
+ abort ();
+ }
+ }
+ else
{
- case GT:
- xops[1] = GEN_INT (0x45);
- output_asm_insn (AS2 (and%B0,%1,%h0), xops);
- /* je label */
- break;
+ xops[0] = gen_rtx_REG (QImode, 0);
- case LT:
- xops[1] = GEN_INT (0x45);
- xops[2] = GEN_INT (0x01);
- output_asm_insn (AS2 (and%B0,%1,%h0), xops);
- output_asm_insn (AS2 (cmp%B0,%2,%h0), xops);
- /* je label */
- break;
+ switch (code)
+ {
+ case GT:
+ xops[1] = GEN_INT (0x45);
+ output_asm_insn (AS2 (and%B0,%1,%h0), xops);
+ /* je label */
+ break;
- case GE:
- xops[1] = GEN_INT (0x05);
- output_asm_insn (AS2 (and%B0,%1,%h0), xops);
- /* je label */
- break;
+ case LT:
+ xops[1] = GEN_INT (0x45);
+ xops[2] = GEN_INT (0x01);
+ output_asm_insn (AS2 (and%B0,%1,%h0), xops);
+ output_asm_insn (AS2 (cmp%B0,%2,%h0), xops);
+ /* je label */
+ break;
- case LE:
- xops[1] = GEN_INT (0x45);
- xops[2] = GEN_INT (0x40);
- output_asm_insn (AS2 (and%B0,%1,%h0), xops);
- output_asm_insn (AS1 (dec%B0,%h0), xops);
- output_asm_insn (AS2 (cmp%B0,%2,%h0), xops);
- /* jb label */
- break;
+ case GE:
+ xops[1] = GEN_INT (0x05);
+ output_asm_insn (AS2 (and%B0,%1,%h0), xops);
+ /* je label */
+ break;
- case EQ:
- xops[1] = GEN_INT (0x45);
- xops[2] = GEN_INT (0x40);
- output_asm_insn (AS2 (and%B0,%1,%h0), xops);
- output_asm_insn (AS2 (cmp%B0,%2,%h0), xops);
- /* je label */
- break;
+ case LE:
+ xops[1] = GEN_INT (0x45);
+ xops[2] = GEN_INT (0x40);
+ output_asm_insn (AS2 (and%B0,%1,%h0), xops);
+ output_asm_insn (AS1 (dec%B0,%h0), xops);
+ output_asm_insn (AS2 (cmp%B0,%2,%h0), xops);
+ /* jb label */
+ break;
- case NE:
- xops[1] = GEN_INT (0x44);
- xops[2] = GEN_INT (0x40);
- output_asm_insn (AS2 (and%B0,%1,%h0), xops);
- output_asm_insn (AS2 (xor%B0,%2,%h0), xops);
- /* jne label */
- break;
+ case EQ:
+ xops[1] = GEN_INT (0x45);
+ xops[2] = GEN_INT (0x40);
+ output_asm_insn (AS2 (and%B0,%1,%h0), xops);
+ output_asm_insn (AS2 (cmp%B0,%2,%h0), xops);
+ /* je label */
+ break;
- case GTU:
- case LTU:
- case GEU:
- case LEU:
- default:
- abort ();
+ case NE:
+ xops[1] = GEN_INT (0x44);
+ xops[2] = GEN_INT (0x40);
+ output_asm_insn (AS2 (and%B0,%1,%h0), xops);
+ output_asm_insn (AS2 (xor%B0,%2,%h0), xops);
+ /* jne label */
+ break;
+
+ case GTU:
+ case LTU:
+ case GEU:
+ case LEU:
+ default:
+ abort ();
+ }
}
return "";
@@ -4928,16 +5033,36 @@ int
agi_dependent (insn, dep_insn)
rtx insn, dep_insn;
{
+ int push = 0, push_dep = 0;
if (GET_CODE (dep_insn) == INSN
&& GET_CODE (PATTERN (dep_insn)) == SET
- && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG)
- return reg_mentioned_in_mem (SET_DEST (PATTERN (dep_insn)), insn);
+ && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG
+ && reg_mentioned_in_mem (SET_DEST (PATTERN (dep_insn)), insn))
+ return 1;
+
+ if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_DEST (PATTERN (insn))) == MEM
+ && push_operand (SET_DEST (PATTERN (insn)),
+ GET_MODE (SET_DEST (PATTERN (insn)))))
+ push = 1;
if (GET_CODE (dep_insn) == INSN && GET_CODE (PATTERN (dep_insn)) == SET
&& GET_CODE (SET_DEST (PATTERN (dep_insn))) == MEM
&& push_operand (SET_DEST (PATTERN (dep_insn)),
GET_MODE (SET_DEST (PATTERN (dep_insn)))))
- return reg_mentioned_in_mem (stack_pointer_rtx, insn);
+ push_dep = 1;
+
+ /* CPUs contain special hardware to allow two pushes. */
+ if (push && push_dep)
+ return 0;
+
+ /* Push operation implicitly change stack pointer causing AGI stalls. */
+ if (push_dep && reg_mentioned_in_mem (stack_pointer_rtx, insn))
+ return 1;
+
+ /* Push also implicitly read stack pointer. */
+ if (push && modified_in_p (stack_pointer_rtx, dep_insn))
+ return 1;
return 0;
}
@@ -5197,6 +5322,13 @@ output_fp_conditional_move (which_alternative, operands)
int which_alternative;
rtx operands[];
{
+ enum rtx_code code = GET_CODE (operands[1]);
+
+ /* This should never happen. */
+ if (!(cc_prev_status.flags & CC_IN_80387)
+ && (code == GT || code == LE || code == GE || code == LT))
+ abort ();
+
switch (which_alternative)
{
case 0:
@@ -5221,9 +5353,7 @@ output_int_conditional_move (which_alternative, operands)
int which_alternative;
rtx operands[];
{
- int code = GET_CODE (operands[1]);
- enum machine_mode mode;
- rtx xops[4];
+ enum rtx_code code = GET_CODE (operands[1]);
/* This is very tricky. We have to do it right. For a code segement
like:
@@ -5243,29 +5373,16 @@ output_int_conditional_move (which_alternative, operands)
&& (cc_prev_status.flags & CC_NO_OVERFLOW))
return NULL_PTR;
- mode = GET_MODE (operands [0]);
- if (mode == DImode)
- {
- xops [0] = gen_rtx_SUBREG (SImode, operands [0], 1);
- xops [1] = operands [1];
- xops [2] = gen_rtx_SUBREG (SImode, operands [2], 1);
- xops [3] = gen_rtx_SUBREG (SImode, operands [3], 1);
- }
-
switch (which_alternative)
{
case 0:
/* r <- cond ? arg : r */
output_asm_insn (AS2 (cmov%C1,%2,%0), operands);
- if (mode == DImode)
- output_asm_insn (AS2 (cmov%C1,%2,%0), xops);
break;
case 1:
/* r <- cond ? r : arg */
output_asm_insn (AS2 (cmov%c1,%3,%0), operands);
- if (mode == DImode)
- output_asm_insn (AS2 (cmov%c1,%3,%0), xops);
break;
default:
@@ -5274,3 +5391,346 @@ output_int_conditional_move (which_alternative, operands)
return "";
}
+
+int
+x86_adjust_cost (insn, link, dep_insn, cost)
+ rtx insn, link, dep_insn;
+ int cost;
+{
+ rtx next_inst;
+
+ if (GET_CODE (dep_insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+ return 0;
+
+ if (GET_CODE (dep_insn) == INSN
+ && GET_CODE (PATTERN (dep_insn)) == SET
+ && GET_CODE (SET_DEST (PATTERN (dep_insn))) == REG
+ && GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && !reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
+ SET_SRC (PATTERN (insn))))
+ return 0; /* ??? */
+
+
+ switch (ix86_cpu)
+ {
+ case PROCESSOR_PENTIUM:
+ if (cost != 0 && is_fp_insn (insn) && is_fp_insn (dep_insn)
+ && !is_fp_dest (dep_insn))
+ return 0;
+
+ if (agi_dependent (insn, dep_insn))
+ return cost ? cost + 1 : 2;
+
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && SET_DEST (PATTERN (insn)) == cc0_rtx
+ && (next_inst = next_nonnote_insn (insn))
+ && GET_CODE (next_inst) == JUMP_INSN)
+ /* compare probably paired with jump */
+ return 0;
+
+ /* Stores stalls one cycle longer than other insns. */
+ if (is_fp_insn (insn) && cost && is_fp_store (dep_insn))
+ cost++;
+ break;
+ case PROCESSOR_K6:
+ default:
+ if (!is_fp_dest (dep_insn))
+ {
+ if(!agi_dependent (insn, dep_insn))
+ return 0;
+ if (TARGET_486)
+ return 2;
+ }
+ else
+ if (is_fp_store (insn) && is_fp_insn (dep_insn)
+ && NEXT_INSN (insn) && NEXT_INSN (NEXT_INSN (insn))
+ && NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))
+ && (GET_CODE (NEXT_INSN (insn)) == INSN)
+ && (GET_CODE (NEXT_INSN (NEXT_INSN (insn))) == JUMP_INSN)
+ && (GET_CODE (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn)))) == NOTE)
+ && (NOTE_LINE_NUMBER (NEXT_INSN (NEXT_INSN (NEXT_INSN (insn))))
+ == NOTE_INSN_LOOP_END))
+ return 3;
+ break;
+ }
+
+ return cost;
+}
+
+/* Output assembly code for a left shift.
+
+ Always use "sal" when shifting a memory operand or for a non constant
+ shift count.
+
+ When optimizing for size, we know that src == dest, and we should always
+ use "sal". If src != dest, then copy src to dest and use "sal".
+
+ Pentium and PPro (speed):
+
+ When src == dest, use "add" for a shift counts of one, else use
+ "sal". If we modeled Pentium AGI stalls and U/V pipelining better we
+ would want to generate lea for some shifts on the Pentium.
+
+ When src != dest, use "lea" for small shift counts. Otherwise,
+ copy src to dest and use the normal shifting code. Exception for
+ TARGET_DOUBLE_WITH_ADD. */
+
+char *
+output_ashl (insn, operands)
+ rtx insn, *operands;
+{
+ /* Handle case where srcreg != dstreg. */
+ if (REG_P (operands[0]) && REGNO (operands[0]) != REGNO (operands[1]))
+ {
+ if (TARGET_DOUBLE_WITH_ADD && INTVAL (operands[2]) == 1)
+ switch (GET_MODE (operands[0]))
+ {
+ case SImode:
+ output_asm_insn (AS2 (mov%L0,%1,%0), operands);
+ return AS2 (add%L0,%1,%0);
+ case HImode:
+ output_asm_insn (AS2 (mov%L0,%k1,%k0), operands);
+ if (i386_cc_probably_useless_p (insn))
+ {
+ CC_STATUS_INIT;
+ return AS2 (add%L0,%k1,%k0);
+ }
+ return AS2 (add%W0,%k1,%k0);
+ case QImode:
+ output_asm_insn (AS2 (mov%B0,%1,%0), operands);
+ return AS2 (add%B0,%1,%0);
+ default:
+ abort ();
+ }
+ else
+ {
+ CC_STATUS_INIT;
+
+ /* This should be extremely rare (impossible?). We can not encode a
+ shift of the stack pointer using an lea instruction. So copy the
+ stack pointer into the destination register and use an lea. */
+ if (operands[1] == stack_pointer_rtx)
+ {
+ output_asm_insn (AS2 (mov%L0,%k1,%k0), operands);
+ operands[1] = operands[0];
+ }
+
+ /* For shifts up to and including 3 bits, use lea. */
+ operands[1] = gen_rtx_MULT (SImode,
+ gen_rtx_REG (SImode, REGNO (operands[1])),
+ GEN_INT (1 << INTVAL (operands[2])));
+ return AS2 (lea%L0,%a1,%k0);
+ }
+ }
+
+ /* Source and destination match. */
+
+ /* Handle variable shift. */
+ if (REG_P (operands[2]))
+ switch (GET_MODE (operands[0]))
+ {
+ case SImode:
+ return AS2 (sal%L0,%b2,%0);
+ case HImode:
+ if (REG_P (operands[0]) && i386_cc_probably_useless_p (insn))
+ {
+ CC_STATUS_INIT;
+ return AS2 (sal%L0,%b2,%k0);
+ }
+ else
+ return AS2 (sal%W0,%b2,%0);
+ case QImode:
+ return AS2 (sal%B0,%b2,%0);
+ default:
+ abort ();
+ }
+
+ /* Always perform shift by 1 using an add instruction. */
+ if (REG_P (operands[0]) && operands[2] == const1_rtx)
+ switch (GET_MODE (operands[0]))
+ {
+ case SImode:
+ return AS2 (add%L0,%0,%0);
+ case HImode:
+ if (REG_P (operands[0]) && i386_cc_probably_useless_p (insn))
+ {
+ CC_STATUS_INIT;
+ return AS2 (add%L0,%k0,%k0);
+ }
+ else
+ return AS2 (add%W0,%0,%0);
+ case QImode:
+ return AS2 (add%B0,%0,%0);
+ default:
+ abort ();
+ }
+
+#if 0
+ /* ??? Currently disabled. Because our model of Pentium is far from being
+ exact, this change will need some benchmarking. */
+ /* Shift reg by 2 or 3 use an lea instruction for Pentium if this is
+ insn is expected to issue into the V pipe (the insn's mode will be
+ TImode for a U pipe, and !TImode for a V pipe instruction). */
+ if (! optimize_size
+ && REG_P (operands[0])
+ && GET_CODE (operands[2]) == CONST_INT
+ && INTVAL (operands[2]) <= 3
+ && (int)ix86_cpu == (int)PROCESSOR_PENTIUM
+ && GET_MODE (insn) != TImode)
+ {
+ CC_STATUS_INIT;
+ operands[1] = gen_rtx_MULT (SImode, gen_rtx_REG (SImode, REGNO (operands[1])),
+ GEN_INT (1 << INTVAL (operands[2])));
+ return AS2 (lea%L0,%a1,%0);
+ }
+#endif
+
+ /* Otherwise use a shift instruction. */
+ switch (GET_MODE (operands[0]))
+ {
+ case SImode:
+ return AS2 (sal%L0,%2,%0);
+ case HImode:
+ if (REG_P (operands[0]) && i386_cc_probably_useless_p (insn))
+ {
+ CC_STATUS_INIT;
+ return AS2 (sal%L0,%2,%k0);
+ }
+ else
+ return AS2 (sal%W0,%2,%0);
+ case QImode:
+ return AS2 (sal%B0,%2,%0);
+ default:
+ abort ();
+ }
+}
+
+/* Given the memory address ADDR, calculate the length of the address or
+ the length of just the displacement (controlled by DISP_LENGTH).
+
+ The length returned does not include the one-byte modrm, opcode,
+ or prefix. */
+
+int
+memory_address_info (addr, disp_length)
+ rtx addr;
+ int disp_length;
+{
+ rtx base, index, disp, scale;
+ rtx op0, op1;
+ int len;
+
+ if (GET_CODE (addr) == PRE_DEC
+ || GET_CODE (addr) == POST_INC)
+ return 0;
+
+ /* Register Indirect. */
+ if (register_operand (addr, Pmode))
+ {
+ /* Special cases: ebp and esp need the two-byte modrm form.
+
+ We change [ESI] to [ESI+0] on the K6 when not optimizing
+ for size. */
+ if (addr == stack_pointer_rtx
+ || addr == arg_pointer_rtx
+ || addr == frame_pointer_rtx
+ || (REGNO_REG_CLASS (REGNO (addr)) == SIREG
+ && ix86_cpu == PROCESSOR_K6 && !optimize_size))
+ return 1;
+ else
+ return 0;
+ }
+
+ /* Direct Addressing. */
+ if (CONSTANT_P (addr))
+ return 4;
+
+ index = base = disp = scale = NULL_RTX;
+ op0 = XEXP (addr, 0);
+ op1 = XEXP (addr, 1);
+
+ if (GET_CODE (addr) == PLUS)
+ {
+ if (register_operand (op0, Pmode))
+ {
+ if (register_operand (op1, Pmode))
+ index = op0, base = op1;
+ else
+ base = op0, disp = op1;
+ }
+ else if (GET_CODE (op0) == MULT)
+ {
+ index = XEXP (op0, 0);
+ scale = XEXP (op0, 1);
+ if (register_operand (op1, Pmode))
+ base = op1;
+ else
+ disp = op1;
+ }
+ else if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
+ {
+ index = XEXP (XEXP (op0, 0), 0);
+ scale = XEXP (XEXP (op0, 0), 1);
+ base = XEXP (op0, 1);
+ disp = op1;
+ }
+ else if (GET_CODE (op0) == PLUS)
+ {
+ index = XEXP (op0, 0);
+ base = XEXP (op0, 1);
+ disp = op1;
+ }
+ else
+ abort ();
+ }
+ else if (GET_CODE (addr) == MULT
+ /* We're called for lea too, which implements ashift on occasion. */
+ || GET_CODE (addr) == ASHIFT)
+ {
+ index = XEXP (addr, 0);
+ scale = XEXP (addr, 1);
+ }
+ else
+ abort ();
+
+ /* Allow arg pointer and stack pointer as index if there is not scaling */
+ if (base && index && !scale
+ && (index == stack_pointer_rtx
+ || index == arg_pointer_rtx
+ || index == frame_pointer_rtx))
+ {
+ rtx tmp = base;
+ base = index;
+ index = tmp;
+ }
+
+ /* Special case: ebp cannot be encoded as a base without a displacement. */
+ if (base == frame_pointer_rtx && !disp)
+ disp = const0_rtx;
+
+ /* Scaling can not be encoded without base or displacement.
+ Except for scale == 1 where we can encode reg + reg instead of reg * 2. */
+ if (!base && index
+ && (!scale || GET_CODE (scale) != CONST_INT || (INTVAL (scale) != 1)))
+ disp = const0_rtx;
+
+ /* Find the length of the displacement constant. */
+ len = 0;
+ if (disp)
+ {
+ if (GET_CODE (disp) == CONST_INT
+ && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
+ len = 1;
+ else
+ len = 4;
+ }
+
+ /* An index requires the two-byte modrm form. Not important
+ if we are computing just length of the displacement. */
+ if (index && ! disp_length)
+ len += 1;
+
+ return len;
+}
OpenPOWER on IntegriCloud