diff options
Diffstat (limited to 'lib/Target')
27 files changed, 331 insertions, 207 deletions
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 3a038c9..a75ed3b 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,7 +17,6 @@ #include "llvm/Type.h" #include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" -#include <ostream> using namespace llvm; ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 09b8ce0..963ff0d 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -64,11 +64,15 @@ namespace { typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; - SmallVector<MachineBasicBlock::iterator, 4> - MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, - int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps); + bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, bool BaseKill, int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, + DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); + void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, + int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVector<MachineBasicBlock::iterator, 4> &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); @@ -108,16 +112,16 @@ static int getLoadStoreMultipleOpcode(int Opcode) { return 0; } -/// mergeOps - Create and insert a LDM or STM with Base as base register and +/// MergeOps - Create and insert a LDM or STM with Base as base register and /// registers in Regs as the register operands that would be loaded / stored. /// It returns true if the transformation is done. -static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, int Opcode, - ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - SmallVector<std::pair<unsigned, bool>, 8> &Regs, - const TargetInstrInfo *TII) { - // FIXME would it be better to take a DL from one of the loads arbitrarily? - DebugLoc dl = DebugLoc::getUnknownLoc(); +bool +ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, bool BaseKill, + int Opcode, ARMCC::CondCodes Pred, + unsigned PredReg, unsigned Scratch, DebugLoc dl, + SmallVector<std::pair<unsigned, bool>, 8> &Regs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) @@ -185,20 +189,21 @@ static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, /// MergeLDR_STR - Merge a number of load / store instructions into one or more /// load / store multiple instructions. -SmallVector<MachineBasicBlock::iterator, 4> +void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps) { - SmallVector<MachineBasicBlock::iterator, 4> Merges; + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVector<MachineBasicBlock::iterator, 4> &Merges) { bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; int Offset = MemOps[SIndex].Offset; int SOffset = Offset; unsigned Pos = MemOps[SIndex].Position; MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; - unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); + DebugLoc dl = Loc->getDebugLoc(); + unsigned PReg = Loc->getOperand(0).getReg(); unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); - bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill(); + bool isKill = Loc->getOperand(0).isKill(); SmallVector<std::pair<unsigned,bool>, 8> Regs; Regs.push_back(std::make_pair(PReg, isKill)); @@ -216,18 +221,17 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, PRegNum = RegNum; } else { // Can't merge this in. Try merge the earlier ones first. - if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, - Scratch, Regs, TII)) { + if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, + Scratch, dl, Regs)) { Merges.push_back(prior(Loc)); for (unsigned j = SIndex; j < i; ++j) { MBB.erase(MemOps[j].MBBI); MemOps[j].Merged = true; } } - SmallVector<MachineBasicBlock::iterator, 4> Merges2 = - MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps); - Merges.append(Merges2.begin(), Merges2.end()); - return Merges; + MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, + MemOps, Merges); + return; } if (MemOps[i].Position > Pos) { @@ -237,8 +241,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, } bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; - if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, - Scratch, Regs, TII)) { + if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, + Scratch, dl, Regs)) { Merges.push_back(prior(Loc)); for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { MBB.erase(MemOps[i].MBBI); @@ -246,7 +250,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, } } - return Merges; + return; } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -530,7 +534,7 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM2) // STR_PRE, STR_POST; BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(BaseKill)) + .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); else // FSTMS, FSTMD @@ -590,6 +594,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { ARMCC::CondCodes CurrPred = ARMCC::AL; unsigned CurrPredReg = 0; unsigned Position = 0; + SmallVector<MachineBasicBlock::iterator,4> Merges; RS->enterBasicBlock(&MBB); MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); @@ -689,16 +694,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { RS->forward(prior(MBBI)); // Merge ops. - SmallVector<MachineBasicBlock::iterator,4> MBBII = - MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, - CurrPred, CurrPredReg, Scratch, MemOps); + Merges.clear(); + MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, + CurrPred, CurrPredReg, Scratch, MemOps, Merges); // Try folding preceeding/trailing base inc/dec into the generated // LDM/STM ops. - for (unsigned i = 0, e = MBBII.size(); i < e; ++i) - if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI)) + for (unsigned i = 0, e = Merges.size(); i < e; ++i) + if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) ++NumMerges; - NumMerges += MBBII.size(); + NumMerges += Merges.size(); // Try folding preceeding/trailing base inc/dec into those load/store // that were not merged to form LDM/STM ops. @@ -709,6 +714,13 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // RS may be pointing to an instruction that's deleted. RS->skipTo(prior(MBBI)); + } else if (NumMemOps == 1) { + // Try folding preceeding/trailing base inc/dec into the single + // load/store. + if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { + ++NumMerges; + RS->forward(prior(MBBI)); + } } CurrBase = 0; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 6662be1..0b0e289 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -100,7 +100,7 @@ public: GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), JumpTableUId(0), ConstPoolEntryUId(0) {} - ARMFunctionInfo(MachineFunction &MF) : + explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), Align(isThumb ? 1U : 2U), VarArgsRegSaveSize(0), HasStackFrame(false), diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index e8daf74..b95d1f9 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -103,28 +103,28 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, let MethodBodies = [{ // FP is R11, R9 is available. static const unsigned ARM_GPR_AO_1[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, ARM::R10, ARM::R11 }; // FP is R11, R9 is not available. static const unsigned ARM_GPR_AO_2[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R10, ARM::R11 }; // FP is R7, R9 is available. static const unsigned ARM_GPR_AO_3[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R8, ARM::R9, ARM::R10,ARM::R11, ARM::R7 }; // FP is R7, R9 is not available. static const unsigned ARM_GPR_AO_4[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R8, ARM::R10,ARM::R11, @@ -186,7 +186,7 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { // scavenging. let MethodBodies = [{ static const unsigned THUMB_tGPR_AO[] = { - ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; // FP is R7, only low registers available. diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h index 47de5df..8221fc7 100644 --- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h +++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h @@ -33,8 +33,8 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo { public: AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {} - AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), - GlobalRetAddr(0) {} + explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), + GlobalRetAddr(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 4d7b545..5814d27 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -1000,8 +1000,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ')'; return; case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: + case Instruction::FSub: case Instruction::Mul: + case Instruction::FMul: case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: @@ -1020,9 +1023,12 @@ void CWriter::printConstant(Constant *CPV, bool Static) { bool NeedsClosingParens = printConstExprCast(CE, Static); printConstantWithCast(CE->getOperand(0), CE->getOpcode()); switch (CE->getOpcode()) { - case Instruction::Add: Out << " + "; break; - case Instruction::Sub: Out << " - "; break; - case Instruction::Mul: Out << " * "; break; + case Instruction::Add: + case Instruction::FAdd: Out << " + "; break; + case Instruction::Sub: + case Instruction::FSub: Out << " - "; break; + case Instruction::Mul: + case Instruction::FMul: Out << " * "; break; case Instruction::URem: case Instruction::SRem: case Instruction::FRem: Out << " % "; break; @@ -1322,8 +1328,6 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!Ty->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::URem: case Instruction::UDiv: NeedsExplicitCast = true; break; @@ -1387,8 +1391,6 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!OpTy->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::UDiv: case Instruction::URem: @@ -1505,8 +1507,6 @@ bool CWriter::writeInstructionCast(const Instruction &I) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!Ty->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::URem: case Instruction::UDiv: @@ -1552,8 +1552,6 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!OpTy->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::UDiv: case Instruction::URem: // Cast to unsigned first @@ -2606,6 +2604,10 @@ void CWriter::visitBinaryOperator(Instruction &I) { Out << "-("; writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I))); Out << ")"; + } else if (BinaryOperator::isFNeg(&I)) { + Out << "-("; + writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I))); + Out << ")"; } else if (I.getOpcode() == Instruction::FRem) { // Output a call to fmod/fmodf instead of emitting a%b if (I.getType() == Type::FloatTy) @@ -2630,9 +2632,12 @@ void CWriter::visitBinaryOperator(Instruction &I) { writeOperandWithCast(I.getOperand(0), I.getOpcode()); switch (I.getOpcode()) { - case Instruction::Add: Out << " + "; break; - case Instruction::Sub: Out << " - "; break; - case Instruction::Mul: Out << " * "; break; + case Instruction::Add: + case Instruction::FAdd: Out << " + "; break; + case Instruction::Sub: + case Instruction::FSub: Out << " - "; break; + case Instruction::Mul: + case Instruction::FMul: Out << " * "; break; case Instruction::URem: case Instruction::SRem: case Instruction::FRem: Out << " % "; break; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 4082989..04a6829 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -865,8 +865,11 @@ namespace { Out << "Constant* " << constName << " = ConstantExpr::"; switch (CE->getOpcode()) { case Instruction::Add: Out << "getAdd("; break; + case Instruction::FAdd: Out << "getFAdd("; break; case Instruction::Sub: Out << "getSub("; break; + case Instruction::FSub: Out << "getFSub("; break; case Instruction::Mul: Out << "getMul("; break; + case Instruction::FMul: Out << "getFMul("; break; case Instruction::UDiv: Out << "getUDiv("; break; case Instruction::SDiv: Out << "getSDiv("; break; case Instruction::FDiv: Out << "getFDiv("; break; @@ -1159,8 +1162,11 @@ namespace { break; } case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: + case Instruction::FSub: case Instruction::Mul: + case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: @@ -1176,8 +1182,11 @@ namespace { Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; switch (I->getOpcode()) { case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::FAdd: Out << "Instruction::FAdd"; break; case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::FSub: Out << "Instruction::FSub"; break; case Instruction::Mul: Out << "Instruction::Mul"; break; + case Instruction::FMul: Out << "Instruction::FMul"; break; case Instruction::UDiv:Out << "Instruction::UDiv"; break; case Instruction::SDiv:Out << "Instruction::SDiv"; break; case Instruction::FDiv:Out << "Instruction::FDiv"; break; diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h index fb93056..e6254d6 100644 --- a/lib/Target/IA64/IA64MachineFunctionInfo.h +++ b/lib/Target/IA64/IA64MachineFunctionInfo.h @@ -24,7 +24,7 @@ public: // by this machinefunction? (used to compute the appropriate // entry in the 'alloc' instruction at the top of the // machinefunction) - IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; }; + explicit IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; }; }; diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index ada851d..37e5b1e 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -1060,12 +1060,15 @@ void MSILWriter::printInstruction(const Instruction* Inst) { break; // Binary case Instruction::Add: + case Instruction::FAdd: printBinaryInstruction("add",Left,Right); break; case Instruction::Sub: + case Instruction::FSub: printBinaryInstruction("sub",Left,Right); break; - case Instruction::Mul: + case Instruction::Mul: + case Instruction::FMul: printBinaryInstruction("mul",Left,Right); break; case Instruction::UDiv: @@ -1322,12 +1325,15 @@ void MSILWriter::printConstantExpr(const ConstantExpr* CE) { printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2)); break; case Instruction::Add: + case Instruction::FAdd: printBinaryInstruction("add",left,right); break; case Instruction::Sub: + case Instruction::FSub: printBinaryInstruction("sub",left,right); break; case Instruction::Mul: + case Instruction::FMul: printBinaryInstruction("mul",left,right); break; case Instruction::UDiv: diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index b94d7e4..1d26ae3 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -28,7 +28,8 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo { public: MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {} - MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {} + explicit MSP430MachineFunctionInfo(MachineFunction &MF) + : CalleeSavedFrameSize(0) {} unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 0f83fd2..ac9a143 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -46,6 +46,16 @@ static const char *getIntrinsicName(unsigned opcode) { case PIC16ISD::MUL_I8: Basename = "mul.i8"; break; case RTLIB::MUL_I16: Basename = "mul.i16"; break; case RTLIB::MUL_I32: Basename = "mul.i32"; break; + + case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break; + case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break; + case RTLIB::UDIV_I16: Basename = "udiv.i16"; break; + case RTLIB::UDIV_I32: Basename = "udiv.i32"; break; + + case RTLIB::SREM_I16: Basename = "srem.i16"; break; + case RTLIB::SREM_I32: Basename = "srem.i32"; break; + case RTLIB::UREM_I16: Basename = "urem.i16"; break; + case RTLIB::UREM_I32: Basename = "urem.i32"; break; } std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); @@ -90,6 +100,20 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16)); setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32)); + // Signed division lib call names + setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16)); + setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32)); + // Unsigned division lib call names + setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16)); + setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32)); + + // Signed remainder lib call names + setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16)); + setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32)); + // Unsigned remainder lib call names + setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16)); + setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32)); + setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); @@ -105,6 +129,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) setOperationAction(ISD::ADDC, MVT::i8, Custom); setOperationAction(ISD::SUBE, MVT::i8, Custom); setOperationAction(ISD::SUBC, MVT::i8, Custom); + setOperationAction(ISD::SUB, MVT::i8, Custom); setOperationAction(ISD::ADD, MVT::i8, Custom); setOperationAction(ISD::ADD, MVT::i16, Custom); @@ -354,21 +379,11 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0)); // FIXME there isn't really debug info here DebugLoc dl = FR->getDebugLoc(); - // FIXME: Not used. - // int Index = FR->getIndex(); // Expand FrameIndex like GlobalAddress and ExternalSymbol // Also use Offset field for lo and hi parts. The default // offset is zero. - /* - SDValue Offset = DAG.getConstant(0, MVT::i8); - SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8); - SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset); - SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset); - return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi); - */ - SDValue ES; int FrameOffset; SDValue FI = SDValue(N,0); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a7744b8..87f8fb0b4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -227,15 +227,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + // This is just the low 32 bits of a (signed) fp->i64 conversion. + // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); // FIXME: disable this lowered code. This generates 64-bit register values, // and we don't model the fact that the top part is clobbered by calls. We // need to flag these together so that the value isn't live across a call. //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - - // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); @@ -2858,7 +2857,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } // FIXME: Split this code up when LegalizeDAGTypes lands. -SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, +SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); @@ -2867,9 +2866,11 @@ SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, SDValue Tmp; switch (Op.getValueType().getSimpleVT()) { - default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); + default: assert(0 && "Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: - Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : + PPCISD::FCTIDZ, + dl, MVT::f64, Src); break; case MVT::i64: Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); @@ -3740,7 +3741,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, Op.getDebugLoc()); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -3834,7 +3836,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: - Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl)); + Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7946474..b6d046f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -377,7 +377,7 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 42883d7..b359dd3 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -59,7 +59,7 @@ private: bool HasFastCall; public: - PPCFunctionInfo(MachineFunction &MF) + explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), SpillsCR(false), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 5d5beeb..cb31506 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -908,6 +908,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). + bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 1b042dd..dea293b 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -41,7 +41,6 @@ namespace llvm { bool RealignStack; bool DisableJumpTables; bool StrongPHIElim; - bool DisableRedZone; bool AsmVerbosityDefault(false); } @@ -86,11 +85,6 @@ GenerateSoftFloatCalls("soft-float", cl::location(UseSoftFloat), cl::init(false)); static cl::opt<bool, true> -GenerateNoImplicitFloats("no-implicit-float", - cl::desc("Don't generate implicit floating point instructions (x86-only)"), - cl::location(NoImplicitFloat), - cl::init(false)); -static cl::opt<bool, true> DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), cl::location(NoZerosInBSS), @@ -163,11 +157,6 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", cl::desc("Use strong PHI elimination."), cl::location(StrongPHIElim), cl::init(false)); -static cl::opt<bool, true> -DisableRedZoneOption("disable-red-zone", - cl::desc("Do not emit code that uses the red zone."), - cl::location(DisableRedZone), - cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 4c3cc82..2604741 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,5 +14,6 @@ #include "X86ELFWriterInfo.h" using namespace llvm; -X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {} +X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) : + TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {} X86ELFWriterInfo::~X86ELFWriterInfo() {} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 06e051a..acfa501 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,7 +20,7 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { public: - X86ELFWriterInfo(); + X86ELFWriterInfo(bool is64Bit); virtual ~X86ELFWriterInfo(); }; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1f507c3..ef60ff5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -126,7 +126,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!UseSoftFloat && !NoImplicitFloat) { + if (!UseSoftFloat) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -550,6 +550,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -734,6 +738,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + if (!DisableMMX && Subtarget->hasMMX()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + } } if (Subtarget->hasSSE41()) { @@ -868,11 +878,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// determining it. MVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const { + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) { + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) { if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) return MVT::v4i32; if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) @@ -1404,11 +1417,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, TotalNumXMMRegs); + bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) && + assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1()) + if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -2414,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) { - int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4) + if (VT.getVectorElementType().getSizeInBits() < 32) return false; + + int NumElts = VT.getVectorNumElements(); if (!isUndefOrEqual(Mask[0], NumElts)) return false; @@ -3068,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Special case for single non-zero, non-undef, element. - if (NumNonZero == 1 && NumElems <= 4) { + if (NumNonZero == 1) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue Item = Op.getOperand(Idx); @@ -3109,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd - // depending on what the source datatype is. Because we can only get here - // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. - if (Idx == 0 && - // Don't do this for i64 values on x86-32. - (EVT != MVT::i64 || Subtarget->is64Bit())) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + // depending on what the source datatype is. + if (Idx == 0) { + if (NumZero == 0) { + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || + (EVT == MVT::i64 && Subtarget->is64Bit())) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + DAG); + } else if (EVT == MVT::i16 || EVT == MVT::i8) { + Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); + MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, + Subtarget->hasSSE2(), DAG); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + } } // Is it a vector logical left shift? @@ -4248,7 +4272,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (EVT.getSizeInBits() == 16) { + if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) @@ -4554,6 +4578,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getValueType(); + + if (SrcVT.isVector()) { + if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -4845,6 +4877,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + if (Op.getValueType().isVector()) { + if (Op.getValueType() == MVT::v2i32 && + Op.getOperand(0).getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. @@ -7675,8 +7715,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, if (Elt.getOpcode() == ISD::UNDEF) continue; - if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, - EVT.getSizeInBits()/8, i, MFI)) + LoadSDNode *LD = cast<LoadSDNode>(Elt); + LoadSDNode *LDBase = cast<LoadSDNode>(Base); + if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) return false; } return true; @@ -7751,44 +7792,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) - // We are looking for load i64 and zero extend. We want to transform - // it before legalizer has a chance to expand it. Also look for i64 - // BUILD_PAIR bit casted to f64. - return SDValue(); - // This must be an insertion into a zero vector. - SDValue HighElt = N->getOperand(1); - if (!isZeroNode(HighElt)) - return SDValue(); + + // Before or during type legalization, we want to try and convert a + // build_vector of an i64 load and a zero value into vzext_movl before the + // legalizer can break it up. + // FIXME: does the case below remove the need to do this? + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) { + if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) + return SDValue(); + + // This must be an insertion into a zero vector. + SDValue HighElt = N->getOperand(1); + if (!isZeroNode(HighElt)) + return SDValue(); + + // Value must be a load. + SDNode *Base = N->getOperand(0).getNode(); + if (!isa<LoadSDNode>(Base)) { + if (Base->getOpcode() != ISD::BIT_CONVERT) + return SDValue(); + Base = Base->getOperand(0).getNode(); + if (!isa<LoadSDNode>(Base)) + return SDValue(); + } + + // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast<LoadSDNode>(Base); + + // Load must not be an extload. + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + // Load type should legal type so we don't have to legalize it. + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return ResNode; + } + + // The type legalizer will have broken apart v2i64 build_vector created during + // widening before the code which handles that case is run. Look for build + // vector (load, load + 4, 0/undef, 0/undef) + if (VT == MVT::v4i32 || VT == MVT::v4f32) { + LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0)); + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1)); + if (!LD0 || !LD1) + return SDValue(); + if (LD0->getExtensionType() != ISD::NON_EXTLOAD || + LD1->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + // Make sure the second elt is a consecutive load. + if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1, + DAG.getMachineFunction().getFrameInfo())) + return SDValue(); - // Value must be a load. - SDNode *Base = N->getOperand(0).getNode(); - if (!isa<LoadSDNode>(Base)) { - if (Base->getOpcode() != ISD::BIT_CONVERT) + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF) return SDValue(); - Base = Base->getOperand(0).getNode(); - if (!isa<LoadSDNode>(Base)) + if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF) return SDValue(); + + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); } - - // Transform it into VZEXT_LOAD addr. - LoadSDNode *LD = cast<LoadSDNode>(Base); - - // Load must not be an extload. - if (LD->getExtensionType() != ISD::NON_EXTLOAD) - return SDValue(); - - // Load type should legal type so we don't have to legalize it. - if (!TLI.isTypeLegal(VT)) - return SDValue(); - - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - TargetLowering::TargetLoweringOpt TLO(DAG); - TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); - DCI.CommitTargetLoweringOpt(TLO); - return ResNode; + return SDValue(); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. @@ -8242,7 +8321,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() != 64) return SDValue(); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2(); + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa<LoadSDNode>(St->getValue()) && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 550f8bd..fb4eb68 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -378,7 +378,8 @@ namespace llvm { /// determining it. virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const; + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2cd3733..8a9b7c9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2009,16 +2009,24 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize); + unsigned CalleeFrameSize = 0; unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - BuildMI(MBB, MI, DL, get(Opc)) - .addReg(Reg, RegState::Kill); + if (RegClass != &X86::VR128RegClass) { + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(Reg, RegState::Kill); + } else { + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + } } + + X86FI->setCalleeSavedFrameSize(CalleeFrameSize); return true; } @@ -2036,7 +2044,12 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - BuildMI(MBB, MI, DL, get(Opc), Reg); + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass != &X86::VR128RegClass) { + BuildMI(MBB, MI, DL, get(Opc), Reg); + } else { + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + } } return true; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 8f287e1..43fadc2 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -577,41 +577,17 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -// Move scalar to MMX zero-extended -// movd to MMX register zero-extends -let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; -} - let AddedComplexity = 20 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), (MMX_MOVZDI2PDIrm addr:$src)>; } // Clear top half. let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; - def : Pat<(v4i16 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; def : Pat<(v2i32 (X86vzmovl VR64:$src)), (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; } -// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower -// 8 or 16-bits matter. -def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))), - (MMX_MOVD64rr GR32:$src)>; -def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), - (MMX_MOVD64rr GR32:$src)>; - // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1fafa46..b44c7a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3447,7 +3447,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { } defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; -defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 8a5ac2c..fafcf7e 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -73,14 +73,15 @@ public: SRetReturnReg(0), GlobalBaseReg(0) {} - X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), - CalleeSavedFrameSize(0), - BytesToPopOnReturn(0), - DecorationStyle(None), - ReturnAddrIndex(0), - TailCallReturnAddrDelta(0), - SRetReturnReg(0), - GlobalBaseReg(0) {} + explicit X86MachineFunctionInfo(MachineFunction &MF) + : ForceFramePointer(false), + CalleeSavedFrameSize(0), + BytesToPopOnReturn(0), + DecorationStyle(None), + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0), + SRetReturnReg(0), + GlobalBaseReg(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5af1fb1..c733f26 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -751,10 +751,12 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). + bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone); if (Is64Bit && !DisableRedZone && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls()) { // No calls. + !MFI->hasCalls() && // No calls. + !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (hasFP(MF)) MinSize += SlotSize; StackSize = std::max(MinSize, @@ -820,13 +822,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } - unsigned ReadyLabelId = 0; - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); - } - // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -836,20 +831,20 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - if (NumBytes) { // adjust stack pointer: ESP -= numbytes + if (NumBytes) { // Adjust stack pointer: ESP -= numbytes. if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { - // Check, whether EAX is livein for this function + // Check, whether EAX is livein for this function. bool isEAXAlive = false; for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { unsigned Reg = II->first; isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); + Reg == X86::AH || Reg == X86::AL); } - // Function prologue calls _alloca to probe the stack when allocating - // more than 4k bytes in one go. Touching the stack at 4K increments is + // Function prologue calls _alloca to probe the stack when allocating more + // than 4k bytes in one go. Touching the stack at 4K increments is // necessary to ensure that the guard pages used by the OS virtual memory // manager are allocated in correct sequence. if (!isEAXAlive) { @@ -861,12 +856,14 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, - TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4); + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes-4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) .addExternalSymbol("_alloca"); + // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), @@ -878,6 +875,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // merge the two. This can be the case when tail call elimination is // enabled and the callee has more arguments then the caller. NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // If there is an ADD32ri or SUB32ri of ESP immediately after this // instruction, merge the two instructions. mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); @@ -887,8 +885,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } } - if (needsFrameMoves) + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + unsigned ReadyLabelId = 0; + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); emitFrameMoves(MF, FrameLabelId, ReadyLabelId); + } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 8264462..88ab247 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,7 +133,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), - InstrInfo(*this), JITInfo(*this), TLInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), + ELFWriterInfo(Subtarget.is64Bit()) { DefRelocModel = getRelocationModel(); // FIXME: Correctly select PIC model for Win64 stuff if (getRelocationModel() == Reloc::Default) { @@ -213,6 +214,13 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out) { + // FIXME: Move this somewhere else! + // On Darwin, override 64-bit static relocation to pic_ since the + // assembler doesn't support it. + if (DefRelocModel == Reloc::Static && + Subtarget.isTargetDarwin() && Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index 43adb0f..124a011 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -40,7 +40,7 @@ public: FPSpillSlot(0), VarArgsFrameIndex(0) {} - XCoreFunctionInfo(MachineFunction &MF) : + explicit XCoreFunctionInfo(MachineFunction &MF) : UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), |