diff options
Diffstat (limited to 'lib/Target')
31 files changed, 854 insertions, 294 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ac7de91..7edd118 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -98,12 +98,12 @@ FunctionPass *createARMCodePrinterPass(raw_ostream &O, FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, MachineCodeEmitter &MCE); -FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM, - MachineCodeEmitter &MCE); -FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, - JITCodeEmitter &JCE); +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, + JITCodeEmitter &JCE); -FunctionPass *createARMLoadStoreOptimizationPass(); +FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMConstantIslandPass(); } // end namespace llvm; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 4ac6857..594811d 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -28,6 +28,8 @@ def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", "ARM v5TE, v5TEj, v5TExp">; def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", "ARM v6">; +def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", + "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", @@ -92,9 +94,11 @@ def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; def : Proc<"mpcorenovfp", [ArchV6]>; def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; -def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>; -def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>; +// V6T2 Processors. +def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; +def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +// V7 Processors. def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 6cd786e..f126760 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -17,6 +17,11 @@ class CCIfSubtarget<string F, CCAction A>: class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; +/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or +/// "Soft". +class CCIfFloatABI<string ABIType, CCAction A>: + CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>; + //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// @@ -43,9 +48,10 @@ def RetCC_ARM_APCS : CallingConv<[ ]>; //===----------------------------------------------------------------------===// -// ARM AAPCS (EABI) Calling Convention +// ARM AAPCS (EABI) Calling Convention, common parts //===----------------------------------------------------------------------===// -def CC_ARM_AAPCS : CallingConv<[ + +def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -53,23 +59,51 @@ def CC_ARM_AAPCS : CallingConv<[ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, - - CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>> ]>; -def RetCC_ARM_AAPCS : CallingConv<[ +def RetCC_ARM_AAPCS_Common : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS : CallingConv<[ + CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, + CCDelegateTo<CC_ARM_AAPCS_Common> +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + CCIfType<[f32], CCBitConvertToType<i32>>, + CCDelegateTo<RetCC_ARM_AAPCS_Common> +]>; - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +//===----------------------------------------------------------------------===// +// ARM AAPCS-VFP (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<CC_ARM_AAPCS_Common> +]>; + +def RetCC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; //===----------------------------------------------------------------------===// @@ -77,11 +111,19 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", + CCIfSubtarget<"hasVFP2()", + CCIfFloatABI<"Hard", + CCDelegateTo<CC_ARM_AAPCS_VFP>>>>, CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>, CCDelegateTo<CC_ARM_APCS> ]>; def RetCC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", + CCIfSubtarget<"hasVFP2()", + CCIfFloatABI<"Hard", + CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>, CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>, CCDelegateTo<RetCC_ARM_APCS> ]>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c0fd9dc..ec8bd1f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1101,7 +1101,12 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { else RC = ARM::GPRRegisterClass; - if (RegVT == MVT::f64) { + if (FloatABIType == FloatABI::Hard) { + if (RegVT == MVT::f32) + RC = ARM::SPRRegisterClass; + else if (RegVT == MVT::f64) + RC = ARM::DPRRegisterClass; + } else if (RegVT == MVT::f64) { // f64 is passed in pairs of GPRs and must be combined. RegVT = MVT::i32; } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 680e772..cc9f1a5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -451,7 +451,7 @@ multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> { /// the function. The first operand is the ID# for this instruction, the second /// is the index into the MachineConstantPool that this is, the third is the /// size in bytes of this constant pool entry. -let isNotDuplicable = 1 in +let neverHasSideEffects = 1, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), @@ -771,6 +771,7 @@ def STM : AXI4st<(outs), // Move Instructions. // +let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, "mov", " $dst, $src", []>, UnaryDP; def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, @@ -946,6 +947,7 @@ def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; // Extra precision multiplies with low / high results +let neverHasSideEffects = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), "smull", " $ldst, $hdst, $a, $b", []>; @@ -967,6 +969,7 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; +} // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ffb83a8..54232f6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -298,6 +298,7 @@ def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "add $dst, $lhs, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; +let neverHasSideEffects = 1 in def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs), "add $dst, $rhs @ addhirr", []>; @@ -387,6 +388,7 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src), // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', // which is MOV(3). This also supports high registers. +let neverHasSideEffects = 1 in { def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src), "cpy $dst, $src", []>; def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src), @@ -395,6 +397,7 @@ def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src), "cpy $dst, $src\t@ lor2hir", []>; def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), "cpy $dst, $src\t@ hir2hir", []>; +} // neverHasSideEffects def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "mul $dst, $rhs", diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0247daf..9104c77 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -192,11 +192,13 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let Inst{7-4} = 0b1100; } +let neverHasSideEffects = 1 in { def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fcpyd", " $dst, $a", []>; def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), "fcpys", " $dst, $a", []>; +} // neverHasSideEffects def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fnegd", " $dst, $a", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 963ff0d..684ecb4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -17,24 +17,31 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); +STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); + +/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine +/// load / store instructions to form ldm / stm instructions. namespace { struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { @@ -81,12 +88,6 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. -FunctionPass *llvm::createARMLoadStoreOptimizationPass() { - return new ARMLoadStoreOpt(); -} - static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: @@ -582,6 +583,23 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { RS->forward(prior(Loc)); } +static int getMemoryOpOffset(const MachineInstr *MI) { + int Opcode = MI->getOpcode(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + int Offset = isAM2 + ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM2) { + if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } else { + if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } + return Offset; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { @@ -606,22 +624,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isMemOp = isMemoryOp(MBBI); if (isMemOp) { int Opcode = MBBI->getOpcode(); - bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; unsigned Size = getLSMultipleTransferSize(MBBI); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); - unsigned NumOperands = MBBI->getDesc().getNumOperands(); - unsigned OffField = MBBI->getOperand(NumOperands-3).getImm(); - int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM2) { - if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } + int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] // r5 := ldr [r5, #4] @@ -744,6 +751,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } +namespace { + struct OffsetCompare { + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + } + }; +} + /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op /// (bx lr) into the preceeding stack restore so it directly restore the value /// of LR into pc. @@ -788,3 +806,277 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { delete RS; return Modified; } + + +/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move +/// load / stores from consecutive locations close to make it more +/// likely they will be combined later. + +namespace { + struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ + static char ID; + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM pre- register allocation load / store optimization pass"; + } + + private: + bool RescheduleOps(MachineBasicBlock *MBB, + SmallVector<MachineInstr*, 4> &Ops, + unsigned Base, bool isLd, + DenseMap<MachineInstr*, unsigned> &MI2LocMap); + bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); + }; + char ARMPreAllocLoadStoreOpt::ID = 0; +} + +bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TII = Fn.getTarget().getInstrInfo(); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) + Modified |= RescheduleLoadStoreInstrs(MFI); + + return Modified; +} + +static bool IsSafeToMove(bool isLd, unsigned Base, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallPtrSet<MachineInstr*, 4> MoveOps, + const TargetRegisterInfo *TRI) { + // Are there stores / loads / calls between them? + // FIXME: This is overly conservative. We should make use of alias information + // some day. + while (++I != E) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) + return false; + if (isLd && TID.mayStore()) + return false; + if (!isLd) { + if (TID.mayLoad()) + return false; + // It's not safe to move the first 'str' down. + // str r1, [r0] + // strh r5, [r0] + // str r4, [r0, #+4] + if (TID.mayStore() && !MoveOps.count(&*I)) + return false; + } + for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { + MachineOperand &MO = I->getOperand(j); + if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) + return false; + } + } + return true; +} + +bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, + SmallVector<MachineInstr*, 4> &Ops, + unsigned Base, bool isLd, + DenseMap<MachineInstr*, unsigned> &MI2LocMap) { + bool RetVal = false; + + // Sort by offset (in reverse order). + std::sort(Ops.begin(), Ops.end(), OffsetCompare()); + + // The loads / stores of the same base are in order. Scan them from first to + // last and check for the followins: + // 1. Any def of base. + // 2. Any gaps. + while (Ops.size() > 1) { + unsigned FirstLoc = ~0U; + unsigned LastLoc = 0; + MachineInstr *FirstOp = 0; + MachineInstr *LastOp = 0; + int LastOffset = 0; + unsigned LastBytes = 0; + unsigned NumMove = 0; + for (int i = Ops.size() - 1; i >= 0; --i) { + MachineInstr *Op = Ops[i]; + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } + + int Offset = getMemoryOpOffset(Op); + unsigned Bytes = getLSMultipleTransferSize(Op); + if (LastBytes) { + if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) + break; + } + LastOffset = Offset; + LastBytes = Bytes; + if (++NumMove == 4) + break; + } + + if (NumMove <= 1) + Ops.pop_back(); + else { + SmallPtrSet<MachineInstr*, 4> MoveOps; + for (int i = NumMove-1; i >= 0; --i) + MoveOps.insert(Ops[i]); + + // Be conservative, if the instructions are too far apart, don't + // move them. We want to limit the increase of register pressure. + bool DoMove = (LastLoc - FirstLoc) < NumMove*4; + if (DoMove) + DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); + if (!DoMove) { + for (unsigned i = 0; i != NumMove; ++i) + Ops.pop_back(); + } else { + // This is the new location for the loads / stores. + MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; + while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) + ++InsertPos; + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + MBB->splice(InsertPos, MBB, Op); + } + + NumLdStMoved += NumMove; + RetVal = true; + } + } + } + + return RetVal; +} + +bool +ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { + bool RetVal = false; + + DenseMap<MachineInstr*, unsigned> MI2LocMap; + DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; + DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; + SmallVector<unsigned, 4> LdBases; + SmallVector<unsigned, 4> StBases; + + unsigned Loc = 0; + MachineBasicBlock::iterator MBBI = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + while (MBBI != E) { + for (; MBBI != E; ++MBBI) { + MachineInstr *MI = MBBI; + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.isCall() || TID.isTerminator()) { + // Stop at barriers. + ++MBBI; + break; + } + + MI2LocMap[MI] = Loc++; + if (!isMemoryOp(MI)) + continue; + unsigned PredReg = 0; + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) + continue; + + int Opcode = MI->getOpcode(); + bool isLd = Opcode == ARM::LDR || + Opcode == ARM::FLDS || Opcode == ARM::FLDD; + unsigned Base = MI->getOperand(1).getReg(); + int Offset = getMemoryOpOffset(MI); + + bool StopHere = false; + if (isLd) { + DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + Base2LdsMap.find(Base); + if (BI != Base2LdsMap.end()) { + for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { + if (Offset == getMemoryOpOffset(BI->second[i])) { + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(MI); + } else { + SmallVector<MachineInstr*, 4> MIs; + MIs.push_back(MI); + Base2LdsMap[Base] = MIs; + LdBases.push_back(Base); + } + } else { + DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + Base2StsMap.find(Base); + if (BI != Base2StsMap.end()) { + for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { + if (Offset == getMemoryOpOffset(BI->second[i])) { + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(MI); + } else { + SmallVector<MachineInstr*, 4> MIs; + MIs.push_back(MI); + Base2StsMap[Base] = MIs; + StBases.push_back(Base); + } + } + + if (StopHere) { + // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. + --Loc; + break; + } + } + + // Re-schedule loads. + for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { + unsigned Base = LdBases[i]; + SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; + if (Lds.size() > 1) + RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); + } + + // Re-schedule stores. + for (unsigned i = 0, e = StBases.size(); i != e; ++i) { + unsigned Base = StBases[i]; + SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; + if (Sts.size() > 1) + RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); + } + + if (MBBI != E) { + Base2LdsMap.clear(); + Base2StsMap.clear(); + LdBases.clear(); + StBases.clear(); + } + } + + return RetVal; +} + + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { + if (PreAlloc) + return new ARMPreAllocLoadStoreOpt(); + return new ARMLoadStoreOpt(); +} diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b95d1f9..ebe7d58 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -219,3 +219,18 @@ def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; + +//===----------------------------------------------------------------------===// +// Subregister Set Definitions... now that we have all of the pieces, define the +// sub registers for each register. +// + +def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S0, S2, S4, S6, S8, S10, S12, S14, + S16, S18, S20, S22, S24, S26, S28, S30]>; + +def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S1, S3, S5, S7, S9, S11, S13, S15, + S17, S19, S21, S23, S25, S27, S29, S31]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index ef78cd5..a978380 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -14,6 +14,8 @@ #include "ARMSubtarget.h" #include "ARMGenSubtarget.inc" #include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, @@ -28,6 +30,10 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. , TargetABI(ARM_ABI_APCS) { + // default to soft float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Soft; + // Determine default and user specified characteristics // Parse features string. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8b469cf..0704055 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -23,7 +23,7 @@ class Module; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4T, V5T, V5TE, V6, V7A + V4T, V5T, V5TE, V6, V6T2, V7A }; enum ARMFPEnum { @@ -92,6 +92,7 @@ protected: bool hasV5TOps() const { return ARMArchVersion >= V5T; } bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } bool hasV6Ops() const { return ARMArchVersion >= V6; } + bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } bool hasVFP2() const { return ARMFPUType >= VFPv2; } @@ -105,6 +106,7 @@ protected: bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } bool isThumb() const { return IsThumb; } + bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); } bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } bool useThumbBacktraces() const { return UseThumbBacktraces; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1dc7d19..7033907 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,6 +23,9 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl::opt<bool> +EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden, + cl::desc("Enable pre-regalloc load store optimization pass")); static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, cl::desc("Disable load store optimization pass")); static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden, @@ -144,6 +147,16 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, return false; } +bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (!EnablePreLdStOpti) + return false; + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. + if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + PM.add(createARMLoadStoreOptimizationPass(true)); + return true; +} + bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb mode. diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 916a8aa..7192c1b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -71,6 +71,7 @@ public: // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 1cf0a91..7cffd0e 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMTarget Target.cpp TargetAsmInfo.cpp TargetData.cpp + TargetELFWriterInfo.cpp TargetFrameInfo.cpp TargetInstrInfo.cpp TargetMachOWriterInfo.cpp @@ -14,4 +15,4 @@ add_llvm_library(LLVMTarget TargetSubtarget.cpp ) -# TODO: Support other targets besides X86. See Makefile.
\ No newline at end of file +# TODO: Support other targets besides X86. See Makefile. diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp index b42ee45..f9a8801 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp @@ -33,8 +33,9 @@ bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { return true; } -/// runOnMachineFunction - This uses the printInstruction() -/// method to print assembly for each instruction. +/// runOnMachineFunction - This emits the frame section, autos section and +/// assembly for each instruction. Also takes care of function begin debug +/// directive and file begin debug directive (if required) for the function. /// bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; @@ -47,20 +48,38 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { const Function *F = MF.getFunction(); CurrentFnName = Mang->getValueName(F); - DbgInfo.EmitFileDirective(F); - // Emit the function variables. + // Iterate over the first basic block instructions to find if it has a + // DebugLoc. If so emit .file directive. Instructions such as movlw do not + // have valid DebugLoc, so need to iterate over instructions. + MachineFunction::const_iterator I = MF.begin(); + for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end(); + MBBI != E; MBBI++) { + const DebugLoc DLoc = MBBI->getDebugLoc(); + if (!DLoc.isUnknown()) { + GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit; + unsigned line = MF.getDebugLocTuple(DLoc).Line; + DbgInfo.EmitFileDirective(CU); + DbgInfo.SetFunctBeginLine(line); + break; + } + } + + // Emit the function frame (args and temps). EmitFunctionFrame(MF); - // Emit function begin debug directives + // Emit function begin debug directive. DbgInfo.EmitFunctBeginDI(F); + // Emit the autos section of function. EmitAutos(CurrentFnName); + + // Now emit the instructions of function in its code section. const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str(); const Section *fCodeSection = TAI->getNamedSection(codeSection, SectionFlags::Code); - O << "\n"; // Start the Code Section. + O << "\n"; SwitchToSection (fCodeSection); // Emit the frame address of the function at the beginning of code. @@ -77,14 +96,17 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out code for the function. for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + // Print a label for the basic block. if (I != MF.begin()) { printBasicBlockLabel(I, true); O << '\n'; } + // Print a basic block. for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { + // Emit the line directive if source line changed. const DebugLoc DL = II->getDebugLoc(); if (!DL.isUnknown()) { @@ -102,6 +124,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit function end debug directives. DbgInfo.EmitFunctEndDI(F, CurLine); + return false; // we didn't modify anything. } @@ -158,11 +181,16 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { } } +/// printCCOperand - Print the cond code operand. +/// void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { int CC = (int)MI->getOperand(opNum).getImm(); O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); } +/// printLibcallDecls - print the extern declarations for compiler +/// intrinsics. +/// void PIC16AsmPrinter::printLibcallDecls(void) { // If no libcalls used, return. if (LibcallDecls.empty()) return; @@ -180,6 +208,10 @@ void PIC16AsmPrinter::printLibcallDecls(void) { O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n"; } +/// doInitialization - Perfrom Module level initializations here. +/// One task that we do here is to sectionize all global variables. +/// The MemSelOptimizer pass depends on the sectionizing. +/// bool PIC16AsmPrinter::doInitialization (Module &M) { bool Result = AsmPrinter::doInitialization(M); @@ -194,23 +226,23 @@ bool PIC16AsmPrinter::doInitialization (Module &M) { I->setSection(TAI->SectionForGlobal(I)->getName()); } - DbgInfo.EmitFileDirective(M); + DbgInfo.Init(M); EmitFunctionDecls(M); EmitUndefinedVars(M); EmitDefinedVars(M); EmitIData(M); EmitUData(M); EmitRomData(M); - DbgInfo.PopulateFunctsDI(M); return Result; } -// Emit extern decls for functions imported from other modules, and emit -// global declarations for function defined in this module and which are -// available to other modules. +/// Emit extern decls for functions imported from other modules, and emit +/// global declarations for function defined in this module and which are +/// available to other modules. +/// void PIC16AsmPrinter::EmitFunctionDecls (Module &M) { // Emit declarations for external functions. - O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n"; + O <<"\n"<<TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n"; for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) { std::string Name = Mang->getValueName(I); if (Name.compare("@abort") == 0) @@ -280,6 +312,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M) bool PIC16AsmPrinter::doFinalization(Module &M) { printLibcallDecls(); + EmitRemainingAutos(); DbgInfo.EmitVarDebugInfo(M); DbgInfo.EmitEOF(); O << "\n\t" << "END\n"; @@ -383,6 +416,8 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName) for (unsigned i = 0; i < AutosSections.size(); i++) { O << "\n"; if (AutosSections[i]->S_->getName() == SectionName) { + // Set the printing status to true + AutosSections[i]->setPrintedStatus(true); SwitchToSection(AutosSections[i]->S_); std::vector<const GlobalVariable*> Items = AutosSections[i]->Items; for (unsigned j = 0; j < Items.size(); j++) { @@ -398,3 +433,34 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName) } } +// Print autos that were not printed during the code printing of functions. +// As the functions might themselves would have got deleted by the optimizer. +void PIC16AsmPrinter::EmitRemainingAutos() +{ + const TargetData *TD = TM.getTargetData(); + + // Now print Autos section for this function. + std::vector <PIC16Section *>AutosSections = PTAI->AutosSections; + for (unsigned i = 0; i < AutosSections.size(); i++) { + + // if the section is already printed then don't print again + if (AutosSections[i]->isPrinted()) + continue; + + // Set status as printed + AutosSections[i]->setPrintedStatus(true); + + O << "\n"; + SwitchToSection(AutosSections[i]->S_); + std::vector<const GlobalVariable*> Items = AutosSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string VarName = Mang->getValueName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + // Emit memory reserve directive. + O << VarName << " RES " << Size << "\n"; + } + } +} + diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h index 2545dfd..8bdcf72 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/PIC16AsmPrinter.h @@ -52,6 +52,7 @@ namespace llvm { void EmitIData (Module &M); void EmitUData (Module &M); void EmitAutos (std::string FunctName); + void EmitRemainingAutos (); void EmitRomData (Module &M); void EmitFunctionFrame(MachineFunction &MF); void printLibcallDecls(void); diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index faf4590..d7ebea7 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -18,13 +18,6 @@ using namespace llvm; -PIC16DbgInfo::~PIC16DbgInfo() { - for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin(); - i!=FunctNameMap.end(); i++) - delete i->second; - FunctNameMap.clear(); -} - void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TypeName) { @@ -70,7 +63,7 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, } HasAux = true; // In auxillary entry for array, 7th and 8th byte represent array size. - Aux[6] = size; + Aux[6] = size & 0xff; Aux[7] = size >> 8; DIType BaseType = CTy.getTypeDerivedFrom(); PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName); @@ -86,10 +79,14 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, else TypeNo = TypeNo | PIC16Dbg::T_UNION; CTy.getName(TypeName); - unsigned size = CTy.getSizeInBits()/8; + // UniqueSuffix is .number where number is obtained from + // llvm.dbg.composite<number>. + std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18); + TypeName += UniqueSuffix; + unsigned short size = CTy.getSizeInBits()/8; // 7th and 8th byte represent size. HasAux = true; - Aux[6] = size; + Aux[6] = size & 0xff; Aux[7] = size >> 8; break; } @@ -145,37 +142,84 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) { return ClassNo; } -void PIC16DbgInfo::PopulateFunctsDI(Module &M) { - GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms"); - if (!Root) - return; - Constant *RootC = cast<Constant>(*Root->use_begin()); - - for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); - UI != UE; ++UI) - for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); - UUI != UUE; ++UUI) { - GlobalVariable *GVSP = cast<GlobalVariable>(*UUI); - DISubprogram *SP = new DISubprogram(GVSP); - std::string Name; - SP->getLinkageName(Name); - FunctNameMap[Name] = SP; - } - return; +void PIC16DbgInfo::Init(Module &M) { + // Do all debug related initializations here. + EmitFileDirective(M); + EmitCompositeTypeDecls(M); } -DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) { - return FunctNameMap[FunctName]; +void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { + for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(), + E = M.getGlobalList().end(); I != E; I++) { + // Structures and union declaration's debug info has llvm.dbg.composite + // in its name. + if(I->getName().find("llvm.dbg.composite") != std::string::npos) { + GlobalVariable *GV = cast<GlobalVariable >(I); + DICompositeType CTy(GV); + if (CTy.getTag() == dwarf::DW_TAG_union_type || + CTy.getTag() == dwarf::DW_TAG_structure_type ) { + std::string name; + CTy.getName(name); + std::string DIVar = I->getName(); + // Get the number after llvm.dbg.composite and make UniqueSuffix from + // it. + std::string UniqueSuffix = "." + DIVar.substr(18); + std::string MangledCTyName = name + UniqueSuffix; + unsigned short size = CTy.getSizeInBits()/8; + int Aux[PIC16Dbg::AuxSize] = {0}; + // 7th and 8th byte represent size of structure/union. + Aux[6] = size & 0xff; + Aux[7] = size >> 8; + // Emit .def for structure/union tag. + if( CTy.getTag() == dwarf::DW_TAG_union_type) + EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG); + else if (CTy.getTag() == dwarf::DW_TAG_structure_type) + EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG); + + // Emit auxiliary debug information for structure/union tag. + EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize); + unsigned long Value = 0; + DIArray Elements = CTy.getTypeArray(); + for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) { + DIDescriptor Element = Elements.getElement(i); + unsigned short TypeNo = 0; + bool HasAux = false; + int ElementAux[PIC16Dbg::AuxSize] = { 0 }; + std::string TypeName = ""; + std::string ElementName; + GlobalVariable *GV = Element.getGV(); + DIDerivedType DITy(GV); + DITy.getName(ElementName); + unsigned short ElementSize = DITy.getSizeInBits()/8; + // Get mangleddd name for this structure/union element. + std::string MangMemName = ElementName + UniqueSuffix; + PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName); + short Class; + if( CTy.getTag() == dwarf::DW_TAG_union_type) + Class = PIC16Dbg::C_MOU; + else if (CTy.getTag() == dwarf::DW_TAG_structure_type) + Class = PIC16Dbg::C_MOS; + EmitSymbol(MangMemName, Class, TypeNo, Value); + if (CTy.getTag() == dwarf::DW_TAG_structure_type) + Value += ElementSize; + if (HasAux) + EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName); + } + // Emit mangled Symbol for end of structure/union. + std::string EOSSymbol = ".eos" + UniqueSuffix; + EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS); + EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName); + } + } + } } void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { std::string FunctName = F->getName(); - DISubprogram *SP = getFunctDI(FunctName); - if (SP) { + if (EmitDebugDirectives) { std::string FunctBeginSym = ".bf." + FunctName; std::string BlockBeginSym = ".bb." + FunctName; - int FunctBeginLine = SP->getLineNumber(); int BFAux[PIC16Dbg::AuxSize] = {0}; BFAux[4] = FunctBeginLine; BFAux[5] = FunctBeginLine >> 8; @@ -189,8 +233,7 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { std::string FunctName = F->getName(); - DISubprogram *SP = getFunctDI(FunctName); - if (SP) { + if (EmitDebugDirectives) { std::string FunctEndSym = ".ef." + FunctName; std::string BlockEndSym = ".eb." + FunctName; @@ -208,14 +251,21 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { /// EmitAuxEntry - Emit Auxiliary debug information. /// -void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) { +void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num, + std::string tag) { O << "\n\t.dim " << VarName << ", 1" ; + if (tag != "") + O << ", " << tag; for (int i = 0; i<num; i++) O << "," << Aux[i]; } -void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) { - O << "\n\t" << ".def "<< Name << ", debug, class = " << Class; +void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short + Type, unsigned long Value) { + O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " + << Class; + if (Value > 0) + O << ", value = " << Value; } void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { @@ -241,18 +291,8 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { O << "\n\t.type " << VarName << ", " << TypeNo; short ClassNo = getClass(DIGV); O << "\n\t.class " << VarName << ", " << ClassNo; - if (HasAux) { - if (TypeName != "") { - // Emit debug info for structure and union objects after - // .dim directive supports structure/union tag name in aux entry. - /* O << "\n\t.dim " << VarName << ", 1," << TypeName; - for (int i = 0; i<PIC16Dbg::AuxSize; i++) - O << "," << Aux[i];*/ - } - else { - EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize); - } - } + if (HasAux) + EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName); } } } @@ -262,26 +302,20 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { void PIC16DbgInfo::EmitFileDirective(Module &M) { GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit"); if (CU) { - DICompileUnit DIUnit(CU); - std::string Dir, FN; - std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN); - O << "\n\t.file\t\"" << File << "\"\n" ; - CurFile = File; + EmitDebugDirectives = true; + EmitFileDirective(CU, false); } } -void PIC16DbgInfo::EmitFileDirective(const Function *F) { - std::string FunctName = F->getName(); - DISubprogram *SP = getFunctDI(FunctName); - if (SP) { - std::string Dir, FN; - DICompileUnit CU = SP->getCompileUnit(); - std::string File = CU.getDirectory(Dir) + "/" + CU.getFilename(FN); - if ( File != CurFile) { +void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) { + std::string Dir, FN; + DICompileUnit DIUnit(CU); + std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN); + if ( File != CurFile ) { + if (EmitEof) EmitEOF(); - O << "\n\t.file\t\"" << File << "\"\n" ; - CurFile = File; - } + O << "\n\t.file\t\"" << File << "\"\n" ; + CurFile = File; } } @@ -290,3 +324,6 @@ void PIC16DbgInfo::EmitEOF() { O << "\n\t.EOF"; } +void PIC16DbgInfo::SetFunctBeginLine(unsigned line) { + FunctBeginLine = line; +} diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h index be39393..9d50380 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.h +++ b/lib/Target/PIC16/PIC16DebugInfo.h @@ -91,29 +91,36 @@ namespace llvm { class raw_ostream; class PIC16DbgInfo { - std::map <std::string, DISubprogram *> FunctNameMap; raw_ostream &O; const TargetAsmInfo *TAI; std::string CurFile; + // EmitDebugDirectives is set if debug information is available. Default + // value for it is false. + bool EmitDebugDirectives; + unsigned FunctBeginLine; public: PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) { - CurFile = ""; + CurFile = ""; + EmitDebugDirectives = false; } - ~PIC16DbgInfo(); void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TypeName); unsigned GetTypeDebugNumber(std::string &type); short getClass(DIGlobalVariable DIGV); - void PopulateFunctsDI(Module &M); - DISubprogram *getFunctDI(std::string FunctName); void EmitFunctBeginDI(const Function *F); + void Init(Module &M); + void EmitCompositeTypeDecls(Module &M); void EmitFunctEndDI(const Function *F, unsigned Line); - void EmitAuxEntry(const std::string VarName, int Aux[], int num); - inline void EmitSymbol(std::string Name, int Class); + void EmitAuxEntry(const std::string VarName, int Aux[], + int num = PIC16Dbg::AuxSize, std::string tag = ""); + inline void EmitSymbol(std::string Name, short Class, + unsigned short Type = PIC16Dbg::T_NULL, + unsigned long Value = 0); void EmitVarDebugInfo(Module &M); void EmitFileDirective(Module &M); - void EmitFileDirective(const Function *F); + void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true); void EmitEOF(); + void SetFunctBeginLine(unsigned line); }; } // end namespace llvm; #endif diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index ac9a143..ba465f3 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -56,6 +56,17 @@ static const char *getIntrinsicName(unsigned opcode) { case RTLIB::SREM_I32: Basename = "srem.i32"; break; case RTLIB::UREM_I16: Basename = "urem.i16"; break; case RTLIB::UREM_I32: Basename = "urem.i32"; break; + + case RTLIB::FPTOSINT_F32_I32: + Basename = "f32_to_si32"; break; + case RTLIB::SINTTOFP_I32_F32: + Basename = "si32_to_f32"; break; + + case RTLIB::ADD_F32: Basename = "add.f32"; break; + case RTLIB::SUB_F32: Basename = "sub.f32"; break; + case RTLIB::MUL_F32: Basename = "mul.f32"; break; + case RTLIB::DIV_F32: Basename = "div.f32"; break; + } std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); @@ -113,7 +124,17 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) // Unsigned remainder lib call names setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16)); setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32)); - + + // Floating point operations + setLibcallName(RTLIB::FPTOSINT_F32_I32, + getIntrinsicName(RTLIB::FPTOSINT_F32_I32)); + setLibcallName(RTLIB::SINTTOFP_I32_F32, + getIntrinsicName(RTLIB::SINTTOFP_I32_F32)); + setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32)); + setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32)); + setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32)); + setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32)); + setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h index e464e36..b7292b8 100644 --- a/lib/Target/PIC16/PIC16TargetAsmInfo.h +++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h @@ -33,9 +33,13 @@ namespace llvm { struct PIC16Section { const Section *S_; // Connection to actual Section. unsigned Size; // Total size of the objects contained. + bool SectionPrinted; std::vector<const GlobalVariable*> Items; - PIC16Section (const Section *s) { S_ = s; Size = 0; } + PIC16Section (const Section *s) { S_ = s; Size = 0; + SectionPrinted = false;} + bool isPrinted() { return SectionPrinted ; } + void setPrintedStatus(bool status) { SectionPrinted = status ;} }; struct PIC16TargetAsmInfo : public TargetAsmInfo { diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp new file mode 100644 index 0000000..9651e65 --- /dev/null +++ b/lib/Target/TargetELFWriterInfo.cpp @@ -0,0 +1,36 @@ +//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetELFWriterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); +} + +TargetELFWriterInfo::~TargetELFWriterInfo() {} + +/// getFunctionAlignment - Returns the alignment for function 'F', targets +/// with different alignment constraints should overload this method +unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const { + const TargetData *TD = TM.getTargetData(); + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPointerABIAlignment(); + unsigned Align = std::max(FnAlign, TDAlign); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + return Align; +} diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index dea293b..c487cb8 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -30,6 +30,7 @@ namespace llvm { bool FiniteOnlyFPMathOption; bool HonorSignDependentRoundingFPMathOption; bool UseSoftFloat; + FloatABI::ABIType FloatABIType; bool NoImplicitFloat; bool NoZerosInBSS; bool ExceptionHandling; @@ -84,6 +85,19 @@ GenerateSoftFloatCalls("soft-float", cl::desc("Generate software floating point library calls"), cl::location(UseSoftFloat), cl::init(false)); +static cl::opt<llvm::FloatABI::ABIType, true> +FloatABIForCalls("float-abi", + cl::desc("Choose float ABI type"), + cl::location(FloatABIType), + cl::init(FloatABI::Default), + cl::values( + clEnumValN(FloatABI::Default, "default", + "Target default float ABI type"), + clEnumValN(FloatABI::Soft, "soft", + "Soft float ABI (implied by -soft-float)"), + clEnumValN(FloatABI::Hard, "hard", + "Hard float ABI (uses FP registers)"), + clEnumValEnd)); static cl::opt<bool, true> DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), @@ -162,6 +176,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", // TargetMachine Class // +TargetMachine::TargetMachine() + : AsmInfo(0) { + // Typically it will be subtargets that will adjust FloatABIType from Default + // to Soft or Hard. + if (UseSoftFloat) + FloatABIType = FloatABI::Soft; +} + TargetMachine::~TargetMachine() { delete AsmInfo; } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 710bd03..3796aac 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -482,35 +482,6 @@ _usesbb: //===---------------------------------------------------------------------===// -Currently we don't have elimination of redundant stack manipulations. Consider -the code: - -int %main() { -entry: - call fastcc void %test1( ) - call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) ) - ret int 0 -} - -declare fastcc void %test1() - -declare fastcc void %test2(sbyte*) - - -This currently compiles to: - - subl $16, %esp - call _test5 - addl $12, %esp - subl $16, %esp - movl $_test5, (%esp) - call _test6 - addl $12, %esp - -The add\sub pair is really unneeded here. - -//===---------------------------------------------------------------------===// - Consider the expansion of: define i32 @test3(i32 %X) { @@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0. //===---------------------------------------------------------------------===// -We aren't matching RMW instructions aggressively -enough. Here's a reduced testcase (more in PR1160): - -define void @test(i32* %huge_ptr, i32* %target_ptr) { - %A = load i32* %huge_ptr ; <i32> [#uses=1] - %B = load i32* %target_ptr ; <i32> [#uses=1] - %C = or i32 %A, %B ; <i32> [#uses=1] - store i32 %C, i32* %target_ptr - ret void -} - -$ llvm-as < t.ll | llc -march=x86-64 - -_test: - movl (%rdi), %eax - orl (%rsi), %eax - movl %eax, (%rsi) - ret - -That should be something like: - -_test: - movl (%rdi), %eax - orl %eax, (%rsi) - ret - -//===---------------------------------------------------------------------===// - The following code: bb114.preheader: ; preds = %cond_next94 @@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" //===---------------------------------------------------------------------===// + +Testcase: +int a(int x) { return (x & 127) > 31; } + +Current output: + movl 4(%esp), %eax + andl $127, %eax + cmpl $31, %eax + seta %al + movzbl %al, %eax + ret + +Ideal output: + xorl %eax, %eax + testl $96, 4(%esp) + setne %al + ret + +We could do this transformation in instcombine, but it's only clearly +beneficial on platforms with a test instruction. + +//===---------------------------------------------------------------------===// +Testcase: +int x(int a) { return (a&0xf0)>>4; } + +Current output: + movl 4(%esp), %eax + shrl $4, %eax + andl $15, %eax + ret + +Ideal output: + movzbl 4(%esp), %eax + shrl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +Testcase: +int x(int a) { return (a & 0x80) ? 0x100 : 0; } + +Current output: + testl $128, 4(%esp) + setne %al + movzbl %al, %eax + shll $8, %eax + ret + +Ideal output: + movl 4(%esp), %eax + addl %eax, %eax + andl $256, %eax + ret + +We generally want to fold shifted tests of a single bit into a shift+and on x86. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7f99203..e9fcbd5 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> ]>; -// Tail call convention (fast): One register is reserved for target address, -// namely R9 -def CC_X86_64_TailCall : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCPassByVal<8, 8>>, - - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType<i32>>, - - // The 'nest' parameter, if any, is passed in R10. - CCIfNest<CCAssignToReg<[R10]>>, - - // The first 6 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>, - CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, - - // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 MMX (except for v1i64) vector arguments are passed in XMM - // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, - - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> -]>; - - //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 2604741..d84034b 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -12,8 +12,27 @@ //===----------------------------------------------------------------------===// #include "X86ELFWriterInfo.h" +#include "llvm/Function.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) : - TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {} +X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) + : TargetELFWriterInfo(TM) { + bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + EMachine = is64Bit ? EM_X86_64 : EM_386; + } + X86ELFWriterInfo::~X86ELFWriterInfo() {} + +unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { + unsigned FnAlign = 4; + + if (F->hasFnAttr(Attribute::OptimizeForSize)) + FnAlign = 1; + + if (F->getAlignment()) + FnAlign = Log2_32(F->getAlignment()); + + return (1 << FnAlign); +} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index acfa501..e9c5bc4 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,8 +20,10 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { public: - X86ELFWriterInfo(bool is64Bit); + X86ELFWriterInfo(TargetMachine &TM); virtual ~X86ELFWriterInfo(); + + virtual unsigned getFunctionAlignment(const Function *F) const; }; } // end llvm namespace diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 33332e4..2bcfd76 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && isTaillCall) - return CC_X86_64_TailCall; else return CC_X86_64_C; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9e15a54..36e3ab2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue StackAdjustment = TailCall.getOperand(2); assert(((TargetAddress.getOpcode() == ISD::Register && (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || - cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || + cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) || TargetAddress.getOpcode() == ISD::TargetExternalSymbol || TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && "Expecting an global address, external symbol, or register"); @@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && PerformTailCallOpt) - return CC_X86_64_TailCall; else return CC_X86_64_C; } @@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); } else if (IsTailCall) { - unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; + unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Opc, getPointerTy()), @@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { LDBase = NULL; - LastLoadedElt = -1; + LastLoadedElt = -1U; for (unsigned i = 0; i < NumElems; ++i) { if (N->getMaskElt(i) < 0) { if (!LDBase) diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c733f26..6c0074e 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } + unsigned ReadyLabelId = 0; + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + } + // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - if (NumBytes) { // Adjust stack pointer: ESP -= numbytes. - if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { - // Check, whether EAX is livein for this function. - bool isEAXAlive = false; - for (MachineRegisterInfo::livein_iterator + // Adjust stack pointer: ESP -= numbytes. + if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + // Check, whether EAX is livein for this function. + bool isEAXAlive = false; + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { - unsigned Reg = II->first; - isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); - } + unsigned Reg = II->first; + isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL); + } - // Function prologue calls _alloca to probe the stack when allocating more - // than 4k bytes in one go. Touching the stack at 4K increments is - // necessary to ensure that the guard pages used by the OS virtual memory - // manager are allocated in correct sequence. - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); - } else { - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes-4); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes-4); - MBB.insert(MBBI, MI); - } + // Function prologue calls _alloca to probe the stack when allocating more + // than 4k bytes in one go. Touching the stack at 4K increments is necessary + // to ensure that the guard pages used by the OS virtual memory manager are + // allocated in correct sequence. + if (!isEAXAlive) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); } else { - // If there is an SUB32ri of ESP immediately before this instruction, - // merge the two. This can be the case when tail call elimination is - // enabled and the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill); + + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes - 4); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); + + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } + } else if (NumBytes) { + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); - // If there is an ADD32ri or SUB32ri of ESP immediately after this - // instruction, merge the two instructions. - mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); - } + if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - unsigned ReadyLabelId = 0; - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + if (needsFrameMoves) emitFrameMoves(MF, FrameLabelId, ReadyLabelId); - } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 03ce1ae..56983ce 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , MaxInlineSizeThreshold(128) , Is64Bit(is64Bit) , TargetType(isELF) { // Default to ELF unless otherwise specified. + + // default to hard float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Hard; // Determine default and user specified characteristics if (!FS.empty()) { diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 88ab247..dfb055f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), - ELFWriterInfo(Subtarget.is64Bit()) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // FIXME: Correctly select PIC model for Win64 stuff if (getRelocationModel() == Reloc::Default) { |