summaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authored <ed@FreeBSD.org>2009-06-14 09:23:33 +0000
committered <ed@FreeBSD.org>2009-06-14 09:23:33 +0000
commitdb89e312d968c258aba3c79c1c398f5fb19267a3 (patch)
tree49817b316c4fdaa56d9d16ebf2555303d1a990e0 /lib/Target
parentde000e339094f8c6e06a635dac9a803861416ec6 (diff)
downloadFreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.zip
FreeBSD-src-db89e312d968c258aba3c79c1c398f5fb19267a3.tar.gz
Import LLVM r73340.
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/ARM/ARM.h10
-rw-r--r--lib/Target/ARM/ARM.td8
-rw-r--r--lib/Target/ARM/ARMCallingConv.td60
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp7
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td5
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td3
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td2
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp336
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td15
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h1
-rw-r--r--lib/Target/CMakeLists.txt3
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.cpp90
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.h1
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp167
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.h23
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp23
-rw-r--r--lib/Target/PIC16/PIC16TargetAsmInfo.h6
-rw-r--r--lib/Target/TargetELFWriterInfo.cpp36
-rw-r--r--lib/Target/TargetMachine.cpp22
-rw-r--r--lib/Target/X86/README.txt114
-rw-r--r--lib/Target/X86/X86CallingConv.td44
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp23
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h4
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp105
-rw-r--r--lib/Target/X86/X86Subtarget.cpp4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp3
31 files changed, 854 insertions, 294 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ac7de91..7edd118 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -98,12 +98,12 @@ FunctionPass *createARMCodePrinterPass(raw_ostream &O,
FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
MachineCodeEmitter &MCE);
-FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
- MachineCodeEmitter &MCE);
-FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM,
- JITCodeEmitter &JCE);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+ JITCodeEmitter &JCE);
-FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMConstantIslandPass();
} // end namespace llvm;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 4ac6857..594811d 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -28,6 +28,8 @@ def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
"ARM v5TE, v5TEj, v5TExp">;
def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6",
"ARM v6">;
+def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
+ "ARM v6t2">;
def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
"ARM v7A">;
def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
@@ -92,9 +94,11 @@ def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
def : Proc<"mpcorenovfp", [ArchV6]>;
def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
-def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>;
+// V6T2 Processors.
+def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>;
+def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+// V7 Processors.
def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>;
def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 6cd786e..f126760 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -17,6 +17,11 @@ class CCIfSubtarget<string F, CCAction A>:
class CCIfAlign<string Align, CCAction A>:
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
+/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
+/// "Soft".
+class CCIfFloatABI<string ABIType, CCAction A>:
+ CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
+
//===----------------------------------------------------------------------===//
// ARM APCS Calling Convention
//===----------------------------------------------------------------------===//
@@ -43,9 +48,10 @@ def RetCC_ARM_APCS : CallingConv<[
]>;
//===----------------------------------------------------------------------===//
-// ARM AAPCS (EABI) Calling Convention
+// ARM AAPCS (EABI) Calling Convention, common parts
//===----------------------------------------------------------------------===//
-def CC_ARM_AAPCS : CallingConv<[
+
+def CC_ARM_AAPCS_Common : CallingConv<[
CCIfType<[i8, i16], CCPromoteToType<i32>>,
@@ -53,23 +59,51 @@ def CC_ARM_AAPCS : CallingConv<[
// i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register
// (and the same is true for f64 if VFP is not enabled)
CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>,
- CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
-
- CCIfType<[f32], CCBitConvertToType<i32>>,
CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&"
"ArgFlags.getOrigAlign() != 8",
CCAssignToReg<[R0, R1, R2, R3]>>>,
- CCIfType<[i32], CCAssignToStack<4, 4>>,
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
CCIfType<[f64], CCAssignToStack<8, 8>>
]>;
-def RetCC_ARM_AAPCS : CallingConv<[
+def RetCC_ARM_AAPCS_Common : CallingConv<[
+ CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
+ CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+]>;
+
+//===----------------------------------------------------------------------===//
+// ARM AAPCS (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS : CallingConv<[
+ CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>,
CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS : CallingConv<[
CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
+]>;
- CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>,
- CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>>
+//===----------------------------------------------------------------------===//
+// ARM AAPCS-VFP (EABI) Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_ARM_AAPCS_VFP : CallingConv<[
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<CC_ARM_AAPCS_Common>
+]>;
+
+def RetCC_ARM_AAPCS_VFP : CallingConv<[
+ CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>,
+ CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8,
+ S9, S10, S11, S12, S13, S14, S15]>>,
+ CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
//===----------------------------------------------------------------------===//
@@ -77,11 +111,19 @@ def RetCC_ARM_AAPCS : CallingConv<[
//===----------------------------------------------------------------------===//
def CC_ARM : CallingConv<[
+ CCIfSubtarget<"isAAPCS_ABI()",
+ CCIfSubtarget<"hasVFP2()",
+ CCIfFloatABI<"Hard",
+ CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
CCDelegateTo<CC_ARM_APCS>
]>;
def RetCC_ARM : CallingConv<[
+ CCIfSubtarget<"isAAPCS_ABI()",
+ CCIfSubtarget<"hasVFP2()",
+ CCIfFloatABI<"Hard",
+ CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
CCDelegateTo<RetCC_ARM_APCS>
]>;
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c0fd9dc..ec8bd1f 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -1101,7 +1101,12 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
else
RC = ARM::GPRRegisterClass;
- if (RegVT == MVT::f64) {
+ if (FloatABIType == FloatABI::Hard) {
+ if (RegVT == MVT::f32)
+ RC = ARM::SPRRegisterClass;
+ else if (RegVT == MVT::f64)
+ RC = ARM::DPRRegisterClass;
+ } else if (RegVT == MVT::f64) {
// f64 is passed in pairs of GPRs and must be combined.
RegVT = MVT::i32;
} else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32)))
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 680e772..cc9f1a5 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -451,7 +451,7 @@ multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
/// the function. The first operand is the ID# for this instruction, the second
/// is the index into the MachineConstantPool that this is, the third is the
/// size in bytes of this constant pool entry.
-let isNotDuplicable = 1 in
+let neverHasSideEffects = 1, isNotDuplicable = 1 in
def CONSTPOOL_ENTRY :
PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size),
@@ -771,6 +771,7 @@ def STM : AXI4st<(outs),
// Move Instructions.
//
+let neverHasSideEffects = 1 in
def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm,
"mov", " $dst, $src", []>, UnaryDP;
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
@@ -946,6 +947,7 @@ def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
// Extra precision multiplies with low / high results
+let neverHasSideEffects = 1 in {
def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b),
"smull", " $ldst, $hdst, $a, $b", []>;
@@ -967,6 +969,7 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst),
(ins GPR:$a, GPR:$b),
"umaal", " $ldst, $hdst, $a, $b", []>,
Requires<[IsARM, HasV6]>;
+} // neverHasSideEffects
// Most significant word multiply
def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index ffb83a8..54232f6 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -298,6 +298,7 @@ def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"add $dst, $lhs, $rhs",
[(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
+let neverHasSideEffects = 1 in
def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
"add $dst, $rhs @ addhirr", []>;
@@ -387,6 +388,7 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
// Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
// which is MOV(3). This also supports high registers.
+let neverHasSideEffects = 1 in {
def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src),
"cpy $dst, $src", []>;
def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
@@ -395,6 +397,7 @@ def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
"cpy $dst, $src\t@ lor2hir", []>;
def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
"cpy $dst, $src\t@ hir2hir", []>;
+} // neverHasSideEffects
def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"mul $dst, $rhs",
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 0247daf..9104c77 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -192,11 +192,13 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
let Inst{7-4} = 0b1100;
}
+let neverHasSideEffects = 1 in {
def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
"fcpyd", " $dst, $a", []>;
def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
"fcpys", " $dst, $a", []>;
+} // neverHasSideEffects
def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
"fnegd", " $dst, $a",
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 963ff0d..684ecb4 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -17,24 +17,31 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
namespace {
struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
@@ -81,12 +88,6 @@ namespace {
char ARMLoadStoreOpt::ID = 0;
}
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
-FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
- return new ARMLoadStoreOpt();
-}
-
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
@@ -582,6 +583,23 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
RS->forward(prior(Loc));
}
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+ int Offset = isAM2
+ ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM2) {
+ if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ return Offset;
+}
+
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -606,22 +624,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
bool isMemOp = isMemoryOp(MBBI);
if (isMemOp) {
int Opcode = MBBI->getOpcode();
- bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
unsigned Size = getLSMultipleTransferSize(MBBI);
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
- unsigned NumOperands = MBBI->getDesc().getNumOperands();
- unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
- int Offset = isAM2
- ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
- if (isAM2) {
- if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- } else {
- if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- }
+ int Offset = getMemoryOpOffset(MBBI);
// Watch out for:
// r4 := ldr [r5]
// r5 := ldr [r5, #4]
@@ -744,6 +751,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
return NumMerges > 0;
}
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
/// (bx lr) into the preceeding stack restore so it directly restore the value
/// of LR into pc.
@@ -788,3 +806,277 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
delete RS;
return Modified;
}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pre- register allocation load / store optimization pass";
+ }
+
+ private:
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TII = Fn.getTarget().getInstrInfo();
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI)
+ Modified |= RescheduleLoadStoreInstrs(MFI);
+
+ return Modified;
+}
+
+static bool IsSafeToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> MoveOps,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ while (++I != E) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ return false;
+ if (isLd && TID.mayStore())
+ return false;
+ if (!isLd) {
+ if (TID.mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (TID.mayStore() && !MoveOps.count(&*I))
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the followins:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = 0;
+ MachineInstr *LastOp = 0;
+ int LastOffset = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ if (++NumMove == 4)
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MoveOps;
+ for (int i = NumMove-1; i >= 0; --i)
+ MoveOps.insert(Ops[i]);
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+ if (DoMove)
+ DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+ ++InsertPos;
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.isCall() || TID.isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ MI2LocMap[MI] = Loc++;
+ if (!isMemoryOp(MI))
+ continue;
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opcode = MI->getOpcode();
+ bool isLd = Opcode == ARM::LDR ||
+ Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2LdsMap[Base] = MIs;
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2StsMap[Base] = MIs;
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index b95d1f9..ebe7d58 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -219,3 +219,18 @@ def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8,
// Condition code registers.
def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>;
+
+//===----------------------------------------------------------------------===//
+// Subregister Set Definitions... now that we have all of the pieces, define the
+// sub registers for each register.
+//
+
+def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15],
+ [S0, S2, S4, S6, S8, S10, S12, S14,
+ S16, S18, S20, S22, S24, S26, S28, S30]>;
+
+def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7,
+ D8, D9, D10, D11, D12, D13, D14, D15],
+ [S1, S3, S5, S7, S9, S11, S13, S15,
+ S17, S19, S21, S23, S25, S27, S29, S31]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index ef78cd5..a978380 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -14,6 +14,8 @@
#include "ARMSubtarget.h"
#include "ARMGenSubtarget.inc"
#include "llvm/Module.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
using namespace llvm;
ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
@@ -28,6 +30,10 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
, CPUString("generic")
, TargetType(isELF) // Default to ELF unless otherwise specified.
, TargetABI(ARM_ABI_APCS) {
+ // default to soft float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Soft;
+
// Determine default and user specified characteristics
// Parse features string.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8b469cf..0704055 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -23,7 +23,7 @@ class Module;
class ARMSubtarget : public TargetSubtarget {
protected:
enum ARMArchEnum {
- V4T, V5T, V5TE, V6, V7A
+ V4T, V5T, V5TE, V6, V6T2, V7A
};
enum ARMFPEnum {
@@ -92,6 +92,7 @@ protected:
bool hasV5TOps() const { return ARMArchVersion >= V5T; }
bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
bool hasV6Ops() const { return ARMArchVersion >= V6; }
+ bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
@@ -105,6 +106,7 @@ protected:
bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
bool isThumb() const { return IsThumb; }
+ bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
bool useThumbBacktraces() const { return UseThumbBacktraces; }
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 1dc7d19..7033907 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,6 +23,9 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+static cl::opt<bool>
+EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
+ cl::desc("Enable pre-regalloc load store optimization pass"));
static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
cl::desc("Disable load store optimization pass"));
static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -144,6 +147,16 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
return false;
}
+bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (!EnablePreLdStOpti)
+ return false;
+ // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+ if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+ PM.add(createARMLoadStoreOptimizationPass(true));
+ return true;
+}
+
bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 916a8aa..7192c1b 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -71,6 +71,7 @@ public:
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 1cf0a91..7cffd0e 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -5,6 +5,7 @@ add_llvm_library(LLVMTarget
Target.cpp
TargetAsmInfo.cpp
TargetData.cpp
+ TargetELFWriterInfo.cpp
TargetFrameInfo.cpp
TargetInstrInfo.cpp
TargetMachOWriterInfo.cpp
@@ -14,4 +15,4 @@ add_llvm_library(LLVMTarget
TargetSubtarget.cpp
)
-# TODO: Support other targets besides X86. See Makefile. \ No newline at end of file
+# TODO: Support other targets besides X86. See Makefile.
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index b42ee45..f9a8801 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -33,8 +33,9 @@ bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
return true;
}
-/// runOnMachineFunction - This uses the printInstruction()
-/// method to print assembly for each instruction.
+/// runOnMachineFunction - This emits the frame section, autos section and
+/// assembly for each instruction. Also takes care of function begin debug
+/// directive and file begin debug directive (if required) for the function.
///
bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
this->MF = &MF;
@@ -47,20 +48,38 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const Function *F = MF.getFunction();
CurrentFnName = Mang->getValueName(F);
- DbgInfo.EmitFileDirective(F);
- // Emit the function variables.
+ // Iterate over the first basic block instructions to find if it has a
+ // DebugLoc. If so emit .file directive. Instructions such as movlw do not
+ // have valid DebugLoc, so need to iterate over instructions.
+ MachineFunction::const_iterator I = MF.begin();
+ for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
+ MBBI != E; MBBI++) {
+ const DebugLoc DLoc = MBBI->getDebugLoc();
+ if (!DLoc.isUnknown()) {
+ GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
+ unsigned line = MF.getDebugLocTuple(DLoc).Line;
+ DbgInfo.EmitFileDirective(CU);
+ DbgInfo.SetFunctBeginLine(line);
+ break;
+ }
+ }
+
+ // Emit the function frame (args and temps).
EmitFunctionFrame(MF);
- // Emit function begin debug directives
+ // Emit function begin debug directive.
DbgInfo.EmitFunctBeginDI(F);
+ // Emit the autos section of function.
EmitAutos(CurrentFnName);
+
+ // Now emit the instructions of function in its code section.
const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str();
const Section *fCodeSection = TAI->getNamedSection(codeSection,
SectionFlags::Code);
- O << "\n";
// Start the Code Section.
+ O << "\n";
SwitchToSection (fCodeSection);
// Emit the frame address of the function at the beginning of code.
@@ -77,14 +96,17 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Print out code for the function.
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
I != E; ++I) {
+
// Print a label for the basic block.
if (I != MF.begin()) {
printBasicBlockLabel(I, true);
O << '\n';
}
+ // Print a basic block.
for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
II != E; ++II) {
+
// Emit the line directive if source line changed.
const DebugLoc DL = II->getDebugLoc();
if (!DL.isUnknown()) {
@@ -102,6 +124,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit function end debug directives.
DbgInfo.EmitFunctEndDI(F, CurLine);
+
return false; // we didn't modify anything.
}
@@ -158,11 +181,16 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
}
}
+/// printCCOperand - Print the cond code operand.
+///
void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) {
int CC = (int)MI->getOperand(opNum).getImm();
O << PIC16CondCodeToString((PIC16CC::CondCodes)CC);
}
+/// printLibcallDecls - print the extern declarations for compiler
+/// intrinsics.
+///
void PIC16AsmPrinter::printLibcallDecls(void) {
// If no libcalls used, return.
if (LibcallDecls.empty()) return;
@@ -180,6 +208,10 @@ void PIC16AsmPrinter::printLibcallDecls(void) {
O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n";
}
+/// doInitialization - Perfrom Module level initializations here.
+/// One task that we do here is to sectionize all global variables.
+/// The MemSelOptimizer pass depends on the sectionizing.
+///
bool PIC16AsmPrinter::doInitialization (Module &M) {
bool Result = AsmPrinter::doInitialization(M);
@@ -194,23 +226,23 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
I->setSection(TAI->SectionForGlobal(I)->getName());
}
- DbgInfo.EmitFileDirective(M);
+ DbgInfo.Init(M);
EmitFunctionDecls(M);
EmitUndefinedVars(M);
EmitDefinedVars(M);
EmitIData(M);
EmitUData(M);
EmitRomData(M);
- DbgInfo.PopulateFunctsDI(M);
return Result;
}
-// Emit extern decls for functions imported from other modules, and emit
-// global declarations for function defined in this module and which are
-// available to other modules.
+/// Emit extern decls for functions imported from other modules, and emit
+/// global declarations for function defined in this module and which are
+/// available to other modules.
+///
void PIC16AsmPrinter::EmitFunctionDecls (Module &M) {
// Emit declarations for external functions.
- O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
+ O <<"\n"<<TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n";
for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) {
std::string Name = Mang->getValueName(I);
if (Name.compare("@abort") == 0)
@@ -280,6 +312,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)
bool PIC16AsmPrinter::doFinalization(Module &M) {
printLibcallDecls();
+ EmitRemainingAutos();
DbgInfo.EmitVarDebugInfo(M);
DbgInfo.EmitEOF();
O << "\n\t" << "END\n";
@@ -383,6 +416,8 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName)
for (unsigned i = 0; i < AutosSections.size(); i++) {
O << "\n";
if (AutosSections[i]->S_->getName() == SectionName) {
+ // Set the printing status to true
+ AutosSections[i]->setPrintedStatus(true);
SwitchToSection(AutosSections[i]->S_);
std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
for (unsigned j = 0; j < Items.size(); j++) {
@@ -398,3 +433,34 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName)
}
}
+// Print autos that were not printed during the code printing of functions.
+// As the functions might themselves would have got deleted by the optimizer.
+void PIC16AsmPrinter::EmitRemainingAutos()
+{
+ const TargetData *TD = TM.getTargetData();
+
+ // Now print Autos section for this function.
+ std::vector <PIC16Section *>AutosSections = PTAI->AutosSections;
+ for (unsigned i = 0; i < AutosSections.size(); i++) {
+
+ // if the section is already printed then don't print again
+ if (AutosSections[i]->isPrinted())
+ continue;
+
+ // Set status as printed
+ AutosSections[i]->setPrintedStatus(true);
+
+ O << "\n";
+ SwitchToSection(AutosSections[i]->S_);
+ std::vector<const GlobalVariable*> Items = AutosSections[i]->Items;
+ for (unsigned j = 0; j < Items.size(); j++) {
+ std::string VarName = Mang->getValueName(Items[j]);
+ Constant *C = Items[j]->getInitializer();
+ const Type *Ty = C->getType();
+ unsigned Size = TD->getTypeAllocSize(Ty);
+ // Emit memory reserve directive.
+ O << VarName << " RES " << Size << "\n";
+ }
+ }
+}
+
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 2545dfd..8bdcf72 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -52,6 +52,7 @@ namespace llvm {
void EmitIData (Module &M);
void EmitUData (Module &M);
void EmitAutos (std::string FunctName);
+ void EmitRemainingAutos ();
void EmitRomData (Module &M);
void EmitFunctionFrame(MachineFunction &MF);
void printLibcallDecls(void);
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index faf4590..d7ebea7 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -18,13 +18,6 @@
using namespace llvm;
-PIC16DbgInfo::~PIC16DbgInfo() {
- for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin();
- i!=FunctNameMap.end(); i++)
- delete i->second;
- FunctNameMap.clear();
-}
-
void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
bool &HasAux, int Aux[],
std::string &TypeName) {
@@ -70,7 +63,7 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
}
HasAux = true;
// In auxillary entry for array, 7th and 8th byte represent array size.
- Aux[6] = size;
+ Aux[6] = size & 0xff;
Aux[7] = size >> 8;
DIType BaseType = CTy.getTypeDerivedFrom();
PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
@@ -86,10 +79,14 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
else
TypeNo = TypeNo | PIC16Dbg::T_UNION;
CTy.getName(TypeName);
- unsigned size = CTy.getSizeInBits()/8;
+ // UniqueSuffix is .number where number is obtained from
+ // llvm.dbg.composite<number>.
+ std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+ TypeName += UniqueSuffix;
+ unsigned short size = CTy.getSizeInBits()/8;
// 7th and 8th byte represent size.
HasAux = true;
- Aux[6] = size;
+ Aux[6] = size & 0xff;
Aux[7] = size >> 8;
break;
}
@@ -145,37 +142,84 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
return ClassNo;
}
-void PIC16DbgInfo::PopulateFunctsDI(Module &M) {
- GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms");
- if (!Root)
- return;
- Constant *RootC = cast<Constant>(*Root->use_begin());
-
- for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
- UI != UE; ++UI)
- for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
- UUI != UUE; ++UUI) {
- GlobalVariable *GVSP = cast<GlobalVariable>(*UUI);
- DISubprogram *SP = new DISubprogram(GVSP);
- std::string Name;
- SP->getLinkageName(Name);
- FunctNameMap[Name] = SP;
- }
- return;
+void PIC16DbgInfo::Init(Module &M) {
+ // Do all debug related initializations here.
+ EmitFileDirective(M);
+ EmitCompositeTypeDecls(M);
}
-DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) {
- return FunctNameMap[FunctName];
+void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
+ for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
+ E = M.getGlobalList().end(); I != E; I++) {
+ // Structures and union declaration's debug info has llvm.dbg.composite
+ // in its name.
+ if(I->getName().find("llvm.dbg.composite") != std::string::npos) {
+ GlobalVariable *GV = cast<GlobalVariable >(I);
+ DICompositeType CTy(GV);
+ if (CTy.getTag() == dwarf::DW_TAG_union_type ||
+ CTy.getTag() == dwarf::DW_TAG_structure_type ) {
+ std::string name;
+ CTy.getName(name);
+ std::string DIVar = I->getName();
+ // Get the number after llvm.dbg.composite and make UniqueSuffix from
+ // it.
+ std::string UniqueSuffix = "." + DIVar.substr(18);
+ std::string MangledCTyName = name + UniqueSuffix;
+ unsigned short size = CTy.getSizeInBits()/8;
+ int Aux[PIC16Dbg::AuxSize] = {0};
+ // 7th and 8th byte represent size of structure/union.
+ Aux[6] = size & 0xff;
+ Aux[7] = size >> 8;
+ // Emit .def for structure/union tag.
+ if( CTy.getTag() == dwarf::DW_TAG_union_type)
+ EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG);
+ else if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+ EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG);
+
+ // Emit auxiliary debug information for structure/union tag.
+ EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
+ unsigned long Value = 0;
+ DIArray Elements = CTy.getTypeArray();
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+ DIDescriptor Element = Elements.getElement(i);
+ unsigned short TypeNo = 0;
+ bool HasAux = false;
+ int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+ std::string TypeName = "";
+ std::string ElementName;
+ GlobalVariable *GV = Element.getGV();
+ DIDerivedType DITy(GV);
+ DITy.getName(ElementName);
+ unsigned short ElementSize = DITy.getSizeInBits()/8;
+ // Get mangleddd name for this structure/union element.
+ std::string MangMemName = ElementName + UniqueSuffix;
+ PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
+ short Class;
+ if( CTy.getTag() == dwarf::DW_TAG_union_type)
+ Class = PIC16Dbg::C_MOU;
+ else if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+ Class = PIC16Dbg::C_MOS;
+ EmitSymbol(MangMemName, Class, TypeNo, Value);
+ if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+ Value += ElementSize;
+ if (HasAux)
+ EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
+ }
+ // Emit mangled Symbol for end of structure/union.
+ std::string EOSSymbol = ".eos" + UniqueSuffix;
+ EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
+ EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName);
+ }
+ }
+ }
}
void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
std::string FunctName = F->getName();
- DISubprogram *SP = getFunctDI(FunctName);
- if (SP) {
+ if (EmitDebugDirectives) {
std::string FunctBeginSym = ".bf." + FunctName;
std::string BlockBeginSym = ".bb." + FunctName;
- int FunctBeginLine = SP->getLineNumber();
int BFAux[PIC16Dbg::AuxSize] = {0};
BFAux[4] = FunctBeginLine;
BFAux[5] = FunctBeginLine >> 8;
@@ -189,8 +233,7 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
std::string FunctName = F->getName();
- DISubprogram *SP = getFunctDI(FunctName);
- if (SP) {
+ if (EmitDebugDirectives) {
std::string FunctEndSym = ".ef." + FunctName;
std::string BlockEndSym = ".eb." + FunctName;
@@ -208,14 +251,21 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
/// EmitAuxEntry - Emit Auxiliary debug information.
///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
+ std::string tag) {
O << "\n\t.dim " << VarName << ", 1" ;
+ if (tag != "")
+ O << ", " << tag;
for (int i = 0; i<num; i++)
O << "," << Aux[i];
}
-void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) {
- O << "\n\t" << ".def "<< Name << ", debug, class = " << Class;
+void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
+ Type, unsigned long Value) {
+ O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = "
+ << Class;
+ if (Value > 0)
+ O << ", value = " << Value;
}
void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
@@ -241,18 +291,8 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
O << "\n\t.type " << VarName << ", " << TypeNo;
short ClassNo = getClass(DIGV);
O << "\n\t.class " << VarName << ", " << ClassNo;
- if (HasAux) {
- if (TypeName != "") {
- // Emit debug info for structure and union objects after
- // .dim directive supports structure/union tag name in aux entry.
- /* O << "\n\t.dim " << VarName << ", 1," << TypeName;
- for (int i = 0; i<PIC16Dbg::AuxSize; i++)
- O << "," << Aux[i];*/
- }
- else {
- EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize);
- }
- }
+ if (HasAux)
+ EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
}
}
}
@@ -262,26 +302,20 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
void PIC16DbgInfo::EmitFileDirective(Module &M) {
GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
if (CU) {
- DICompileUnit DIUnit(CU);
- std::string Dir, FN;
- std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
- O << "\n\t.file\t\"" << File << "\"\n" ;
- CurFile = File;
+ EmitDebugDirectives = true;
+ EmitFileDirective(CU, false);
}
}
-void PIC16DbgInfo::EmitFileDirective(const Function *F) {
- std::string FunctName = F->getName();
- DISubprogram *SP = getFunctDI(FunctName);
- if (SP) {
- std::string Dir, FN;
- DICompileUnit CU = SP->getCompileUnit();
- std::string File = CU.getDirectory(Dir) + "/" + CU.getFilename(FN);
- if ( File != CurFile) {
+void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
+ std::string Dir, FN;
+ DICompileUnit DIUnit(CU);
+ std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
+ if ( File != CurFile ) {
+ if (EmitEof)
EmitEOF();
- O << "\n\t.file\t\"" << File << "\"\n" ;
- CurFile = File;
- }
+ O << "\n\t.file\t\"" << File << "\"\n" ;
+ CurFile = File;
}
}
@@ -290,3 +324,6 @@ void PIC16DbgInfo::EmitEOF() {
O << "\n\t.EOF";
}
+void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
+ FunctBeginLine = line;
+}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index be39393..9d50380 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -91,29 +91,36 @@ namespace llvm {
class raw_ostream;
class PIC16DbgInfo {
- std::map <std::string, DISubprogram *> FunctNameMap;
raw_ostream &O;
const TargetAsmInfo *TAI;
std::string CurFile;
+ // EmitDebugDirectives is set if debug information is available. Default
+ // value for it is false.
+ bool EmitDebugDirectives;
+ unsigned FunctBeginLine;
public:
PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
- CurFile = "";
+ CurFile = "";
+ EmitDebugDirectives = false;
}
- ~PIC16DbgInfo();
void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
int Aux[], std::string &TypeName);
unsigned GetTypeDebugNumber(std::string &type);
short getClass(DIGlobalVariable DIGV);
- void PopulateFunctsDI(Module &M);
- DISubprogram *getFunctDI(std::string FunctName);
void EmitFunctBeginDI(const Function *F);
+ void Init(Module &M);
+ void EmitCompositeTypeDecls(Module &M);
void EmitFunctEndDI(const Function *F, unsigned Line);
- void EmitAuxEntry(const std::string VarName, int Aux[], int num);
- inline void EmitSymbol(std::string Name, int Class);
+ void EmitAuxEntry(const std::string VarName, int Aux[],
+ int num = PIC16Dbg::AuxSize, std::string tag = "");
+ inline void EmitSymbol(std::string Name, short Class,
+ unsigned short Type = PIC16Dbg::T_NULL,
+ unsigned long Value = 0);
void EmitVarDebugInfo(Module &M);
void EmitFileDirective(Module &M);
- void EmitFileDirective(const Function *F);
+ void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
void EmitEOF();
+ void SetFunctBeginLine(unsigned line);
};
} // end namespace llvm;
#endif
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ac9a143..ba465f3 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -56,6 +56,17 @@ static const char *getIntrinsicName(unsigned opcode) {
case RTLIB::SREM_I32: Basename = "srem.i32"; break;
case RTLIB::UREM_I16: Basename = "urem.i16"; break;
case RTLIB::UREM_I32: Basename = "urem.i32"; break;
+
+ case RTLIB::FPTOSINT_F32_I32:
+ Basename = "f32_to_si32"; break;
+ case RTLIB::SINTTOFP_I32_F32:
+ Basename = "si32_to_f32"; break;
+
+ case RTLIB::ADD_F32: Basename = "add.f32"; break;
+ case RTLIB::SUB_F32: Basename = "sub.f32"; break;
+ case RTLIB::MUL_F32: Basename = "mul.f32"; break;
+ case RTLIB::DIV_F32: Basename = "div.f32"; break;
+
}
std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -113,7 +124,17 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
// Unsigned remainder lib call names
setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
-
+
+ // Floating point operations
+ setLibcallName(RTLIB::FPTOSINT_F32_I32,
+ getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+ setLibcallName(RTLIB::SINTTOFP_I32_F32,
+ getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+ setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
+ setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
+ setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
+ setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
+
setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h
index e464e36..b7292b8 100644
--- a/lib/Target/PIC16/PIC16TargetAsmInfo.h
+++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h
@@ -33,9 +33,13 @@ namespace llvm {
struct PIC16Section {
const Section *S_; // Connection to actual Section.
unsigned Size; // Total size of the objects contained.
+ bool SectionPrinted;
std::vector<const GlobalVariable*> Items;
- PIC16Section (const Section *s) { S_ = s; Size = 0; }
+ PIC16Section (const Section *s) { S_ = s; Size = 0;
+ SectionPrinted = false;}
+ bool isPrinted() { return SectionPrinted ; }
+ void setPrintedStatus(bool status) { SectionPrinted = status ;}
};
struct PIC16TargetAsmInfo : public TargetAsmInfo {
diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp
new file mode 100644
index 0000000..9651e65
--- /dev/null
+++ b/lib/Target/TargetELFWriterInfo.cpp
@@ -0,0 +1,36 @@
+//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the TargetELFWriterInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Function.h"
+#include "llvm/Target/TargetELFWriterInfo.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+using namespace llvm;
+
+TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) {
+ is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ isLittleEndian = TM.getTargetData()->isLittleEndian();
+}
+
+TargetELFWriterInfo::~TargetELFWriterInfo() {}
+
+/// getFunctionAlignment - Returns the alignment for function 'F', targets
+/// with different alignment constraints should overload this method
+unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const {
+ const TargetData *TD = TM.getTargetData();
+ unsigned FnAlign = F->getAlignment();
+ unsigned TDAlign = TD->getPointerABIAlignment();
+ unsigned Align = std::max(FnAlign, TDAlign);
+ assert(!(Align & (Align-1)) && "Alignment is not a power of two!");
+ return Align;
+}
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index dea293b..c487cb8 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -30,6 +30,7 @@ namespace llvm {
bool FiniteOnlyFPMathOption;
bool HonorSignDependentRoundingFPMathOption;
bool UseSoftFloat;
+ FloatABI::ABIType FloatABIType;
bool NoImplicitFloat;
bool NoZerosInBSS;
bool ExceptionHandling;
@@ -84,6 +85,19 @@ GenerateSoftFloatCalls("soft-float",
cl::desc("Generate software floating point library calls"),
cl::location(UseSoftFloat),
cl::init(false));
+static cl::opt<llvm::FloatABI::ABIType, true>
+FloatABIForCalls("float-abi",
+ cl::desc("Choose float ABI type"),
+ cl::location(FloatABIType),
+ cl::init(FloatABI::Default),
+ cl::values(
+ clEnumValN(FloatABI::Default, "default",
+ "Target default float ABI type"),
+ clEnumValN(FloatABI::Soft, "soft",
+ "Soft float ABI (implied by -soft-float)"),
+ clEnumValN(FloatABI::Hard, "hard",
+ "Hard float ABI (uses FP registers)"),
+ clEnumValEnd));
static cl::opt<bool, true>
DontPlaceZerosInBSS("nozero-initialized-in-bss",
cl::desc("Don't place zero-initialized symbols into bss section"),
@@ -162,6 +176,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim",
// TargetMachine Class
//
+TargetMachine::TargetMachine()
+ : AsmInfo(0) {
+ // Typically it will be subtargets that will adjust FloatABIType from Default
+ // to Soft or Hard.
+ if (UseSoftFloat)
+ FloatABIType = FloatABI::Soft;
+}
+
TargetMachine::~TargetMachine() {
delete AsmInfo;
}
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 710bd03..3796aac 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -482,35 +482,6 @@ _usesbb:
//===---------------------------------------------------------------------===//
-Currently we don't have elimination of redundant stack manipulations. Consider
-the code:
-
-int %main() {
-entry:
- call fastcc void %test1( )
- call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) )
- ret int 0
-}
-
-declare fastcc void %test1()
-
-declare fastcc void %test2(sbyte*)
-
-
-This currently compiles to:
-
- subl $16, %esp
- call _test5
- addl $12, %esp
- subl $16, %esp
- movl $_test5, (%esp)
- call _test6
- addl $12, %esp
-
-The add\sub pair is really unneeded here.
-
-//===---------------------------------------------------------------------===//
-
Consider the expansion of:
define i32 @test3(i32 %X) {
@@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0.
//===---------------------------------------------------------------------===//
-We aren't matching RMW instructions aggressively
-enough. Here's a reduced testcase (more in PR1160):
-
-define void @test(i32* %huge_ptr, i32* %target_ptr) {
- %A = load i32* %huge_ptr ; <i32> [#uses=1]
- %B = load i32* %target_ptr ; <i32> [#uses=1]
- %C = or i32 %A, %B ; <i32> [#uses=1]
- store i32 %C, i32* %target_ptr
- ret void
-}
-
-$ llvm-as < t.ll | llc -march=x86-64
-
-_test:
- movl (%rdi), %eax
- orl (%rsi), %eax
- movl %eax, (%rsi)
- ret
-
-That should be something like:
-
-_test:
- movl (%rdi), %eax
- orl %eax, (%rsi)
- ret
-
-//===---------------------------------------------------------------------===//
-
The following code:
bb114.preheader: ; preds = %cond_next94
@@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona,
Core 2, and "Generic"
//===---------------------------------------------------------------------===//
+
+Testcase:
+int a(int x) { return (x & 127) > 31; }
+
+Current output:
+ movl 4(%esp), %eax
+ andl $127, %eax
+ cmpl $31, %eax
+ seta %al
+ movzbl %al, %eax
+ ret
+
+Ideal output:
+ xorl %eax, %eax
+ testl $96, 4(%esp)
+ setne %al
+ ret
+
+We could do this transformation in instcombine, but it's only clearly
+beneficial on platforms with a test instruction.
+
+//===---------------------------------------------------------------------===//
+Testcase:
+int x(int a) { return (a&0xf0)>>4; }
+
+Current output:
+ movl 4(%esp), %eax
+ shrl $4, %eax
+ andl $15, %eax
+ ret
+
+Ideal output:
+ movzbl 4(%esp), %eax
+ shrl $4, %eax
+ ret
+
+//===---------------------------------------------------------------------===//
+
+Testcase:
+int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+
+Current output:
+ testl $128, 4(%esp)
+ setne %al
+ movzbl %al, %eax
+ shll $8, %eax
+ ret
+
+Ideal output:
+ movl 4(%esp), %eax
+ addl %eax, %eax
+ andl $256, %eax
+ ret
+
+We generally want to fold shifted tests of a single bit into a shift+and on x86.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 7f99203..e9fcbd5 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
]>;
-// Tail call convention (fast): One register is reserved for target address,
-// namely R9
-def CC_X86_64_TailCall : CallingConv<[
- // Handles byval parameters.
- CCIfByVal<CCPassByVal<8, 8>>,
-
- // Promote i8/i16 arguments to i32.
- CCIfType<[i8, i16], CCPromoteToType<i32>>,
-
- // The 'nest' parameter, if any, is passed in R10.
- CCIfNest<CCAssignToReg<[R10]>>,
-
- // The first 6 integer arguments are passed in integer registers.
- CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>,
- CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>,
-
- // The first 8 FP/Vector arguments are passed in XMM registers.
- CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfSubtarget<"hasSSE1()",
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
- // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
- // registers on Darwin.
- CCIfType<[v8i8, v4i16, v2i32, v2f32],
- CCIfSubtarget<"isTargetDarwin()",
- CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
-
- // The first 8 v1i64 vector arguments are passed in GPRs on Darwin.
- CCIfType<[v1i64],
- CCIfSubtarget<"isTargetDarwin()",
- CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>,
-
- // Integer/FP values get stored in stack slots that are 8 bytes in size and
- // 8-byte aligned if there are no more registers to hold them.
- CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
-
- // Vectors get 16-byte stack slots that are 16-byte aligned.
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
- // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
- CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
-]>;
-
-
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 2604741..d84034b 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -12,8 +12,27 @@
//===----------------------------------------------------------------------===//
#include "X86ELFWriterInfo.h"
+#include "llvm/Function.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
using namespace llvm;
-X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) :
- TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {}
+X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM)
+ : TargetELFWriterInfo(TM) {
+ bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64;
+ EMachine = is64Bit ? EM_X86_64 : EM_386;
+ }
+
X86ELFWriterInfo::~X86ELFWriterInfo() {}
+
+unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
+ unsigned FnAlign = 4;
+
+ if (F->hasFnAttr(Attribute::OptimizeForSize))
+ FnAlign = 1;
+
+ if (F->getAlignment())
+ FnAlign = Log2_32(F->getAlignment());
+
+ return (1 << FnAlign);
+}
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index acfa501..e9c5bc4 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -20,8 +20,10 @@ namespace llvm {
class X86ELFWriterInfo : public TargetELFWriterInfo {
public:
- X86ELFWriterInfo(bool is64Bit);
+ X86ELFWriterInfo(TargetMachine &TM);
virtual ~X86ELFWriterInfo();
+
+ virtual unsigned getFunctionAlignment(const Function *F) const;
};
} // end llvm namespace
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 33332e4..2bcfd76 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
- else if (CC == CallingConv::Fast && isTaillCall)
- return CC_X86_64_TailCall;
else
return CC_X86_64_C;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9e15a54..36e3ab2 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
SDValue StackAdjustment = TailCall.getOperand(2);
assert(((TargetAddress.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX ||
- cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) ||
+ cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) ||
TargetAddress.getOpcode() == ISD::TargetExternalSymbol ||
TargetAddress.getOpcode() == ISD::TargetGlobalAddress) &&
"Expecting an global address, external symbol, or register");
@@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const {
if (Subtarget->is64Bit()) {
if (Subtarget->isTargetWin64())
return CC_X86_Win64_C;
- else if (CC == CallingConv::Fast && PerformTailCallOpt)
- return CC_X86_64_TailCall;
else
return CC_X86_64_C;
}
@@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
} else if (IsTailCall) {
- unsigned Opc = Is64Bit ? X86::R9 : X86::EAX;
+ unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
DAG.getRegister(Opc, getPointerTy()),
@@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
SelectionDAG &DAG, MachineFrameInfo *MFI,
const TargetLowering &TLI) {
LDBase = NULL;
- LastLoadedElt = -1;
+ LastLoadedElt = -1U;
for (unsigned i = 0; i < NumElems; ++i) {
if (N->getMaskElt(i) < 0) {
if (!LDBase)
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c733f26..6c0074e 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
+ unsigned ReadyLabelId = 0;
+ if (needsFrameMoves) {
+ // Mark effective beginning of when frame pointer is ready.
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+ }
+
// Skip the callee-saved push instructions.
while (MBBI != MBB.end() &&
(MBBI->getOpcode() == X86::PUSH32r ||
@@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
if (MBBI != MBB.end())
DL = MBBI->getDebugLoc();
- if (NumBytes) { // Adjust stack pointer: ESP -= numbytes.
- if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
- // Check, whether EAX is livein for this function.
- bool isEAXAlive = false;
- for (MachineRegisterInfo::livein_iterator
+ // Adjust stack pointer: ESP -= numbytes.
+ if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) {
+ // Check, whether EAX is livein for this function.
+ bool isEAXAlive = false;
+ for (MachineRegisterInfo::livein_iterator
II = MF.getRegInfo().livein_begin(),
EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) {
- unsigned Reg = II->first;
- isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL);
- }
+ unsigned Reg = II->first;
+ isEAXAlive = (Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL);
+ }
- // Function prologue calls _alloca to probe the stack when allocating more
- // than 4k bytes in one go. Touching the stack at 4K increments is
- // necessary to ensure that the guard pages used by the OS virtual memory
- // manager are allocated in correct sequence.
- if (!isEAXAlive) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca");
- } else {
- // Save EAX
- BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
- .addReg(X86::EAX, RegState::Kill);
-
- // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
- // allocated bytes for EAX.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes-4);
- BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
- .addExternalSymbol("_alloca");
-
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes-4);
- MBB.insert(MBBI, MI);
- }
+ // Function prologue calls _alloca to probe the stack when allocating more
+ // than 4k bytes in one go. Touching the stack at 4K increments is necessary
+ // to ensure that the guard pages used by the OS virtual memory manager are
+ // allocated in correct sequence.
+ if (!isEAXAlive) {
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
} else {
- // If there is an SUB32ri of ESP immediately before this instruction,
- // merge the two. This can be the case when tail call elimination is
- // enabled and the callee has more arguments then the caller.
- NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
+ // Save EAX
+ BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
+ .addReg(X86::EAX, RegState::Kill);
+
+ // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
+ // allocated bytes for EAX.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
+ .addImm(NumBytes - 4);
+ BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
+ .addExternalSymbol("_alloca");
+
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
+ }
+ } else if (NumBytes) {
+ // If there is an SUB32ri of ESP immediately before this instruction, merge
+ // the two. This can be the case when tail call elimination is enabled and
+ // the callee has more arguments then the caller.
+ NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
- // If there is an ADD32ri or SUB32ri of ESP immediately after this
- // instruction, merge the two instructions.
- mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
+ // If there is an ADD32ri or SUB32ri of ESP immediately after this
+ // instruction, merge the two instructions.
+ mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
- if (NumBytes)
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
- }
+ if (NumBytes)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
}
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer is ready.
- unsigned ReadyLabelId = 0;
- ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
+ if (needsFrameMoves)
emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
- }
}
void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 03ce1ae..56983ce 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
, MaxInlineSizeThreshold(128)
, Is64Bit(is64Bit)
, TargetType(isELF) { // Default to ELF unless otherwise specified.
+
+ // default to hard float ABI
+ if (FloatABIType == FloatABI::Default)
+ FloatABIType = FloatABI::Hard;
// Determine default and user specified characteristics
if (!FS.empty()) {
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 88ab247..dfb055f 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS,
DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
- InstrInfo(*this), JITInfo(*this), TLInfo(*this),
- ELFWriterInfo(Subtarget.is64Bit()) {
+ InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();
// FIXME: Correctly select PIC model for Win64 stuff
if (getRelocationModel() == Reloc::Default) {
OpenPOWER on IntegriCloud