diff options
author | rdivacky <rdivacky@FreeBSD.org> | 2009-11-19 08:59:28 +0000 |
---|---|---|
committer | rdivacky <rdivacky@FreeBSD.org> | 2009-11-19 08:59:28 +0000 |
commit | 02fb33730c346d6a785d935d7aba9af93ba51da6 (patch) | |
tree | bc7d702d3d7f66d1e9083201cc8327bc0d06a450 /lib | |
parent | d2e985fd323c167e20f77b045a1d99ad166e65db (diff) | |
download | FreeBSD-src-02fb33730c346d6a785d935d7aba9af93ba51da6.zip FreeBSD-src-02fb33730c346d6a785d935d7aba9af93ba51da6.tar.gz |
Update LLVM to r89337.
Diffstat (limited to 'lib')
35 files changed, 829 insertions, 494 deletions
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index cf52320..efe40e4 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/CommandLine.h" #include <algorithm> using namespace llvm; @@ -31,6 +32,10 @@ char IVUsers::ID = 0; static RegisterPass<IVUsers> X("iv-users", "Induction Variable Users", false, true); +static cl::opt<bool> +SimplifyIVUsers("simplify-iv-users", cl::Hidden, cl::init(false), + cl::desc("Restrict IV Users to loop-invariant strides")); + Pass *llvm::createIVUsersPass() { return new IVUsers(); } @@ -208,6 +213,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) { if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT)) return false; // Non-reducible symbolic expression, bail out. + // Keep things simple. Don't touch loop-variant strides. + if (SimplifyIVUsers && !Stride->isLoopInvariant(L) + && L->contains(I->getParent())) + return false; + SmallPtrSet<Instruction *, 4> UniqueUsers; for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E; ++UI) { diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 1c8b8f4..fcdcfd3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -727,7 +727,8 @@ void DwarfException::EmitExceptionTable() { // somewhere. This predicate should be moved to a shared location that is // in target-independent code. // - if (LSDASection->getKind().isWriteable() || + if ((LSDASection->getKind().isWriteable() && + !LSDASection->getKind().isReadOnlyWithRel()) || Asm->TM.getRelocationModel() == Reloc::Static) TTypeFormat = dwarf::DW_EH_PE_absptr; else @@ -917,14 +918,36 @@ void DwarfException::EmitExceptionTable() { } // Emit the Catch TypeInfos. + const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); + unsigned Index = 1; + for (std::vector<GlobalVariable *>::const_reverse_iterator I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { - const GlobalVariable *GV = *I; - PrintRelDirective(); - - if (GV) { - O << Asm->Mang->getMangledName(GV); + const GlobalVariable *TI = *I; + + if (TI) { + if (!LSDASection->getKind().isReadOnlyWithRel() && + (TTypeFormat == dwarf::DW_EH_PE_absptr || + TI->getLinkage() == GlobalValue::InternalLinkage)) { + // Print out the unadorned name of the type info. + PrintRelDirective(); + O << Asm->Mang->getMangledName(TI); + } else { + bool IsTypeInfoIndirect = false, IsTypeInfoPCRel = false; + const MCExpr *TypeInfoRef = + TLOF.getSymbolForDwarfGlobalReference(TI, Asm->Mang, Asm->MMI, + IsTypeInfoIndirect, + IsTypeInfoPCRel); + + if (!IsTypeInfoPCRel) + TypeInfoRef = CreateLabelDiff(TypeInfoRef, "typeinforef_addr", + Index++); + + O << MAI->getData32bitsDirective(); + TypeInfoRef->print(O, MAI); + } } else { + PrintRelDirective(); O << "0x0"; } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 94bfb72..f807e8f 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -41,8 +41,12 @@ using namespace llvm; STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); STATISTIC(NumBranchOpts, "Number of branches optimized"); STATISTIC(NumTailMerge , "Number of block tails merged"); +STATISTIC(NumTailDups , "Number of tail duplicated blocks"); +STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); + static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge", cl::init(cl::BOU_UNSET), cl::Hidden); + // Throttle for huge numbers of predecessors (compile speed problems) static cl::opt<unsigned> TailMergeThreshold("tail-merge-threshold", @@ -193,7 +197,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= OptimizeImpDefsBlock(MBB); } - bool MadeChangeThisIteration = true; while (MadeChangeThisIteration) { MadeChangeThisIteration = false; @@ -202,10 +205,15 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= MadeChangeThisIteration; } - // Do tail duplication once after tail merging is done. Otherwise it is + // Do tail duplication after tail merging is done. Otherwise it is // tough to avoid situations where tail duplication and tail merging undo // each other's transformations ad infinitum. - MadeChange |= TailDuplicateBlocks(MF); + MadeChangeThisIteration = true; + while (MadeChangeThisIteration) { + MadeChangeThisIteration = false; + MadeChangeThisIteration |= TailDuplicateBlocks(MF); + MadeChange |= MadeChangeThisIteration; + } // See if any jump tables have become mergable or dead as the code generator // did its thing. @@ -1003,9 +1011,6 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) { bool MadeChange = false; - // Make sure blocks are numbered in order - MF.RenumberBlocks(); - for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { MachineBasicBlock *MBB = I++; @@ -1017,6 +1022,7 @@ bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) { // If it is dead, remove it. if (MBB->pred_empty()) { + NumInstrDups -= MBB->size(); RemoveDeadBlock(MBB); MadeChange = true; ++NumDeadBlocks; @@ -1097,6 +1103,7 @@ bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, MachineInstr *NewMI = MF.CloneMachineInstr(I); PredBB->insert(PredBB->end(), NewMI); } + NumInstrDups += TailBB->size() - 1; // subtract one for removed branch // Update the CFG. PredBB->removeSuccessor(PredBB->succ_begin()); @@ -1107,6 +1114,7 @@ bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB, PredBB->addSuccessor(*I); Changed = true; + ++NumTailDups; } // If TailBB was duplicated into all its predecessors except for the prior diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a60d34f..bbfc82b 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -53,16 +53,9 @@ static cl::opt<bool> DisableReMat("disable-rematerialization", static cl::opt<bool> EnableFastSpilling("fast-spill", cl::init(false), cl::Hidden); -static cl::opt<bool> EarlyCoalescing("early-coalescing", - cl::init(false), cl::Hidden); - -static cl::opt<int> CoalescingLimit("early-coalescing-limit", - cl::init(-1), cl::Hidden); - STATISTIC(numIntervals , "Number of original intervals"); STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); -STATISTIC(numCoalescing, "Number of early coalescing performed"); char LiveIntervals::ID = 0; static RegisterPass<LiveIntervals> X("liveintervals", "Live Interval Analysis"); @@ -96,7 +89,6 @@ void LiveIntervals::releaseMemory() { delete I->second; r2iMap_.clear(); - phiJoinCopies.clear(); // Release VNInfo memroy regions after all VNInfo objects are dtor'd. VNInfoAllocator.Reset(); @@ -121,7 +113,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { allocatableRegs_ = tri_->getAllocatableSet(fn); computeIntervals(); - performEarlyCoalescing(); numIntervals += getNumIntervals(); @@ -409,7 +400,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // Remove the old range that we now know has an incorrect number. VNInfo *VNI = interval.getValNumInfo(0); MachineInstr *Killer = vi.Kills[0]; - phiJoinCopies.push_back(Killer); SlotIndex Start = getMBBStartIdx(Killer->getParent()); SlotIndex End = getInstructionIndex(Killer).getDefIndex(); DEBUG({ @@ -653,133 +643,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, DEBUG(errs() << " +" << LR << '\n'); } -bool LiveIntervals:: -isSafeAndProfitableToCoalesce(LiveInterval &DstInt, - LiveInterval &SrcInt, - SmallVector<MachineInstr*,16> &IdentCopies, - SmallVector<MachineInstr*,16> &OtherCopies) { - unsigned NumIdent = 0; - for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg), - re = mri_->def_end(); ri != re; ++ri) { - MachineInstr *MI = &*ri; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) - return false; - if (SrcReg != DstInt.reg) { - // Non-identity copy - we cannot handle overlapping intervals - if (DstInt.liveAt(getInstructionIndex(MI))) - return false; - OtherCopies.push_back(MI); - } else { - IdentCopies.push_back(MI); - ++NumIdent; - } - } - - return IdentCopies.size() > OtherCopies.size(); -} - -void LiveIntervals::performEarlyCoalescing() { - if (!EarlyCoalescing) - return; - - /// Perform early coalescing: eliminate copies which feed into phi joins - /// and whose sources are defined by the phi joins. - for (unsigned i = 0, e = phiJoinCopies.size(); i != e; ++i) { - MachineInstr *Join = phiJoinCopies[i]; - if (CoalescingLimit != -1 && (int)numCoalescing == CoalescingLimit) - break; - - unsigned PHISrc, PHIDst, SrcSubReg, DstSubReg; - bool isMove= tii_->isMoveInstr(*Join, PHISrc, PHIDst, SrcSubReg, DstSubReg); -#ifndef NDEBUG - assert(isMove && "PHI join instruction must be a move!"); -#else - isMove = isMove; -#endif - - LiveInterval &DstInt = getInterval(PHIDst); - LiveInterval &SrcInt = getInterval(PHISrc); - SmallVector<MachineInstr*, 16> IdentCopies; - SmallVector<MachineInstr*, 16> OtherCopies; - if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt, - IdentCopies, OtherCopies)) - continue; - - DEBUG(errs() << "PHI Join: " << *Join); - assert(DstInt.containsOneValue() && "PHI join should have just one val#!"); - assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 && - "PHI join src should not be used elsewhere"); - VNInfo *VNI = DstInt.getValNumInfo(0); - - // Change the non-identity copies to directly target the phi destination. - for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) { - MachineInstr *PHICopy = OtherCopies[i]; - SlotIndex MIIndex = getInstructionIndex(PHICopy); - DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy); - SlotIndex DefIndex = MIIndex.getDefIndex(); - LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); - SlotIndex StartIndex = SLR->start; - SlotIndex EndIndex = SLR->end; - - // Delete val# defined by the now identity copy and add the range from - // beginning of the mbb to the end of the range. - SrcInt.removeValNo(SLR->valno); - DEBUG(errs() << " added range [" << StartIndex << ',' - << EndIndex << "] to reg" << DstInt.reg << '\n'); - assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!"); - VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true, - VNInfoAllocator); - NewVNI->setHasPHIKill(true); - DstInt.addRange(LiveRange(StartIndex, EndIndex, NewVNI)); - for (unsigned j = 0, ee = PHICopy->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = PHICopy->getOperand(j); - if (!MO.isReg() || MO.getReg() != PHISrc) - continue; - MO.setReg(PHIDst); - } - } - - // Now let's eliminate all the would-be identity copies. - for (unsigned i = 0, e = IdentCopies.size(); i != e; ++i) { - MachineInstr *PHICopy = IdentCopies[i]; - DEBUG(errs() << "Coalescing: " << *PHICopy); - - SlotIndex MIIndex = getInstructionIndex(PHICopy); - SlotIndex DefIndex = MIIndex.getDefIndex(); - LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex); - SlotIndex StartIndex = SLR->start; - SlotIndex EndIndex = SLR->end; - - // Delete val# defined by the now identity copy and add the range from - // beginning of the mbb to the end of the range. - SrcInt.removeValNo(SLR->valno); - RemoveMachineInstrFromMaps(PHICopy); - PHICopy->eraseFromParent(); - DEBUG(errs() << " added range [" << StartIndex << ',' - << EndIndex << "] to reg" << DstInt.reg << '\n'); - DstInt.addRange(LiveRange(StartIndex, EndIndex, VNI)); - } - - // Remove the phi join and update the phi block liveness. - SlotIndex MIIndex = getInstructionIndex(Join); - SlotIndex UseIndex = MIIndex.getUseIndex(); - SlotIndex DefIndex = MIIndex.getDefIndex(); - LiveRange *SLR = SrcInt.getLiveRangeContaining(UseIndex); - LiveRange *DLR = DstInt.getLiveRangeContaining(DefIndex); - DLR->valno->setCopy(0); - DLR->valno->setIsDefAccurate(false); - DstInt.addRange(LiveRange(SLR->start, SLR->end, DLR->valno)); - SrcInt.removeRange(SLR->start, SLR->end); - assert(SrcInt.empty()); - removeInterval(PHISrc); - RemoveMachineInstrFromMaps(Join); - Join->eraseFromParent(); - - ++numCoalescing; - } -} - /// computeIntervals - computes the live intervals for virtual /// registers. for some ordering of the machine instructions [1,N] a /// live interval is an interval [i, j) where 1 <= i <= j < N for diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index be9f68f..a1c74c0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -42,23 +42,18 @@ using namespace llvm; namespace { - struct MachineVerifier : public MachineFunctionPass { - static char ID; // Pass ID, replacement for typeid + struct MachineVerifier { - MachineVerifier(bool allowDoubleDefs = false) : - MachineFunctionPass(&ID), + MachineVerifier(Pass *pass, bool allowDoubleDefs) : + PASS(pass), allowVirtDoubleDefs(allowDoubleDefs), allowPhysDoubleDefs(allowDoubleDefs), OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS")) - {} - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } + {} bool runOnMachineFunction(MachineFunction &MF); + Pass *const PASS; const bool allowVirtDoubleDefs; const bool allowPhysDoubleDefs; @@ -112,6 +107,10 @@ namespace { // regsKilled and regsLiveOut. RegSet vregsPassed; + // Vregs that must pass through MBB because they are needed by a successor + // block. This set is disjoint from regsLiveOut. + RegSet vregsRequired; + BBInfo() : reachable(false) {} // Add register to vregsPassed if it belongs there. Return true if @@ -133,6 +132,34 @@ namespace { return changed; } + // Add register to vregsRequired if it belongs there. Return true if + // anything changed. + bool addRequired(unsigned Reg) { + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + return false; + if (regsLiveOut.count(Reg)) + return false; + return vregsRequired.insert(Reg).second; + } + + // Same for a full set. + bool addRequired(const RegSet &RS) { + bool changed = false; + for (RegSet::const_iterator I = RS.begin(), E = RS.end(); I != E; ++I) + if (addRequired(*I)) + changed = true; + return changed; + } + + // Same for a full map. + bool addRequired(const RegMap &RM) { + bool changed = false; + for (RegMap::const_iterator I = RM.begin(), E = RM.end(); I != E; ++I) + if (addRequired(I->first)) + changed = true; + return changed; + } + // Live-out registers are either in regsLiveOut or vregsPassed. bool isLiveOut(unsigned Reg) const { return regsLiveOut.count(Reg) || vregsPassed.count(Reg); @@ -146,6 +173,9 @@ namespace { return Reg < regsReserved.size() && regsReserved.test(Reg); } + // Analysis information if available + LiveVariables *LiveVars; + void visitMachineFunctionBefore(); void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB); void visitMachineInstrBefore(const MachineInstr *MI); @@ -163,20 +193,44 @@ namespace { void calcMaxRegsPassed(); void calcMinRegsPassed(); void checkPHIOps(const MachineBasicBlock *MBB); + + void calcRegsRequired(); + void verifyLiveVariables(); + }; + + struct MachineVerifierPass : public MachineFunctionPass { + static char ID; // Pass ID, replacement for typeid + bool AllowDoubleDefs; + + explicit MachineVerifierPass(bool allowDoubleDefs = false) + : MachineFunctionPass(&ID), + AllowDoubleDefs(allowDoubleDefs) {} + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) { + MF.verify(this, AllowDoubleDefs); + return false; + } }; + } -char MachineVerifier::ID = 0; -static RegisterPass<MachineVerifier> +char MachineVerifierPass::ID = 0; +static RegisterPass<MachineVerifierPass> MachineVer("machineverifier", "Verify generated machine code"); static const PassInfo *const MachineVerifyID = &MachineVer; FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) { - return new MachineVerifier(allowPhysDoubleDefs); + return new MachineVerifierPass(allowPhysDoubleDefs); } -void MachineFunction::verify() const { - MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this)); +void MachineFunction::verify(Pass *p, bool allowDoubleDefs) const { + MachineVerifier(p, allowDoubleDefs) + .runOnMachineFunction(const_cast<MachineFunction&>(*this)); } bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { @@ -202,6 +256,12 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { TRI = TM->getRegisterInfo(); MRI = &MF.getRegInfo(); + if (PASS) { + LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>(); + } else { + LiveVars = NULL; + } + visitMachineFunctionBefore(); for (MachineFunction::const_iterator MFI = MF.begin(), MFE = MF.end(); MFI!=MFE; ++MFI) { @@ -518,8 +578,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } else if (MO->isUse()) { regsLiveInButUnused.erase(Reg); + bool isKill = false; if (MO->isKill()) { - addRegWithSubRegs(regsKilled, Reg); + isKill = true; // Tied operands on two-address instuctions MUST NOT have a <kill> flag. if (MI->isRegTiedToDefOperand(MONum)) report("Illegal kill flag on two-address instruction operand", @@ -529,8 +590,20 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { unsigned defIdx; if (MI->isRegTiedToDefOperand(MONum, &defIdx) && MI->getOperand(defIdx).getReg() == Reg) - addRegWithSubRegs(regsKilled, Reg); + isKill = true; + } + if (isKill) { + addRegWithSubRegs(regsKilled, Reg); + + // Check that LiveVars knows this kill + if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg)) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + if (std::find(VI.Kills.begin(), + VI.Kills.end(), MI) == VI.Kills.end()) + report("Kill missing from LiveVariables", MO, MONum); + } } + // Use of a dead register. if (!regsLive.count(Reg)) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -734,6 +807,41 @@ void MachineVerifier::calcMinRegsPassed() { } } +// Calculate the set of virtual registers that must be passed through each basic +// block in order to satisfy the requirements of successor blocks. This is very +// similar to calcMaxRegsPassed, only backwards. +void MachineVerifier::calcRegsRequired() { + // First push live-in regs to predecessors' vregsRequired. + DenseSet<const MachineBasicBlock*> todo; + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + const MachineBasicBlock &MBB(*MFI); + BBInfo &MInfo = MBBInfoMap[&MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB.pred_begin(), + PrE = MBB.pred_end(); PrI != PrE; ++PrI) { + BBInfo &PInfo = MBBInfoMap[*PrI]; + if (PInfo.addRequired(MInfo.vregsLiveIn)) + todo.insert(*PrI); + } + } + + // Iteratively push vregsRequired to predecessors. This will converge to the + // same final state regardless of DenseSet iteration order. + while (!todo.empty()) { + const MachineBasicBlock *MBB = *todo.begin(); + todo.erase(MBB); + BBInfo &MInfo = MBBInfoMap[MBB]; + for (MachineBasicBlock::const_pred_iterator PrI = MBB->pred_begin(), + PrE = MBB->pred_end(); PrI != PrE; ++PrI) { + if (*PrI == MBB) + continue; + BBInfo &SInfo = MBBInfoMap[*PrI]; + if (SInfo.addRequired(MInfo.vregsRequired)) + todo.insert(*PrI); + } + } +} + // Check PHI instructions at the beginning of MBB. It is assumed that // calcMinRegsPassed has been run so BBInfo::isLiveOut is valid. void MachineVerifier::checkPHIOps(const MachineBasicBlock *MBB) { @@ -849,4 +957,39 @@ void MachineVerifier::visitMachineFunctionAfter() { } } } + + // Now check LiveVariables info if available + if (LiveVars) { + calcRegsRequired(); + verifyLiveVariables(); + } } + +void MachineVerifier::verifyLiveVariables() { + assert(LiveVars && "Don't call verifyLiveVariables without LiveVars"); + for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister, + RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) { + LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg); + for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); + MFI != MFE; ++MFI) { + BBInfo &MInfo = MBBInfoMap[MFI]; + + // Our vregsRequired should be identical to LiveVariables' AliveBlocks + if (MInfo.vregsRequired.count(Reg)) { + if (!VI.AliveBlocks.test(MFI->getNumber())) { + report("LiveVariables: Block missing from AliveBlocks", MFI); + *OS << "Virtual register %reg" << Reg + << " must be live through the block.\n"; + } + } else { + if (VI.AliveBlocks.test(MFI->getNumber())) { + report("LiveVariables: Block should not be in AliveBlocks", MFI); + *OS << "Virtual register %reg" << Reg + << " is not needed live through the block.\n"; + } + } + } + } +} + + diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index cd38dd1..b3802ed 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -21,7 +21,6 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/Function.h" #include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallPtrSet.h" @@ -37,37 +36,17 @@ using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); STATISTIC(NumSplits, "Number of critical edges split on demand"); -static cl::opt<bool> -SplitEdges("split-phi-edges", - cl::desc("Split critical edges during phi elimination"), - cl::init(false), cl::Hidden); - char PHIElimination::ID = 0; static RegisterPass<PHIElimination> X("phi-node-elimination", "Eliminate PHI nodes for register allocation"); const PassInfo *const llvm::PHIEliminationID = &X; -namespace llvm { FunctionPass *createLocalRegisterAllocator(); } - -// Should we run edge splitting? -static bool shouldSplitEdges() { - // Edge splitting breaks the local register allocator. It cannot tolerate - // LiveVariables being run. - if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator) - return false; - return SplitEdges; -} - void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<LiveVariables>(); AU.addPreserved<MachineDominatorTree>(); - if (shouldSplitEdges()) { - AU.addRequired<LiveVariables>(); - } else { - AU.setPreservesCFG(); - AU.addPreservedID(MachineLoopInfoID); - } + // rdar://7401784 This would be nice: + // AU.addPreservedID(MachineLoopInfoID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -79,9 +58,9 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) { bool Changed = false; // Split critical edges to help the coalescer - if (shouldSplitEdges()) + if (LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>()) for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) - Changed |= SplitPHIEdges(Fn, *I); + Changed |= SplitPHIEdges(Fn, *I, *LV); // Populate VRegPHIUseCount analyzePHINodes(Fn); @@ -361,10 +340,11 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) { } bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, - MachineBasicBlock &MBB) { + MachineBasicBlock &MBB, + LiveVariables &LV) { if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI) return false; // Quick exit for basic blocks without PHIs. - LiveVariables &LV = getAnalysis<LiveVariables>(); + for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end(); BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) { for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) { diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h index 94716ee..f8c9fe7 100644 --- a/lib/CodeGen/PHIElimination.h +++ b/lib/CodeGen/PHIElimination.h @@ -90,7 +90,8 @@ namespace llvm { void analyzePHINodes(const MachineFunction& Fn); /// Split critical edges where necessary for good coalescer performance. - bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB); + bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, + LiveVariables &LV); /// isLiveOut - Determine if Reg is live out from MBB, when not /// considering PHI nodes. This means that Reg is either killed by diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 6930abf..fff50da 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -59,11 +59,6 @@ PreSplitIntervals("pre-alloc-split", cl::desc("Pre-register allocation live interval splitting"), cl::init(false), cl::Hidden); -static cl::opt<bool> -NewSpillFramework("new-spill-framework", - cl::desc("New spilling framework"), - cl::init(false), cl::Hidden); - static RegisterRegAlloc linearscanRegAlloc("linearscan", "linear scan register allocator", createLinearScanRegisterAllocator); @@ -441,9 +436,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) { vrm_ = &getAnalysis<VirtRegMap>(); if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - if (NewSpillFramework) { - spiller_.reset(createSpiller(mf_, li_, ls_, vrm_)); - } + spiller_.reset(createSpiller(mf_, li_, ls_, loopInfo, vrm_)); initIntervalSets(); @@ -1157,11 +1150,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { SmallVector<LiveInterval*, 8> spillIs; std::vector<LiveInterval*> added; - if (!NewSpillFramework) { - added = li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_); - } else { - added = spiller_->spill(cur); - } + added = spiller_->spill(cur, spillIs); std::sort(added.begin(), added.end(), LISorter()); addStackInterval(cur, ls_, li_, mri_, *vrm_); @@ -1241,11 +1230,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { earliestStartInterval : sli; std::vector<LiveInterval*> newIs; - if (!NewSpillFramework) { - newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_); - } else { - newIs = spiller_->spill(sli); - } + newIs = spiller_->spill(sli, spillIs); addStackInterval(sli, ls_, li_, mri_, *vrm_); std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 9107325..20c4a28 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -18,11 +18,25 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +namespace { + enum SpillerName { trivial, standard }; +} + +static cl::opt<SpillerName> +spillerOpt("spiller", + cl::desc("Spiller to use: (default: standard)"), + cl::Prefix, + cl::values(clEnumVal(trivial, "trivial spiller"), + clEnumVal(standard, "default spiller"), + clEnumValEnd), + cl::init(standard)); + Spiller::~Spiller() {} namespace { @@ -49,153 +63,9 @@ protected: tii = mf->getTarget().getInstrInfo(); } - /// Ensures there is space before the given machine instruction, returns the - /// instruction's new number. - SlotIndex makeSpaceBefore(MachineInstr *mi) { - //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) { - // FIXME: Should be updated to use rewrite-in-place methods when they're - // introduced. Currently broken. - //lis->scaleNumbering(2); - //ls->scaleNumbering(2); - //} - - SlotIndex miIdx = lis->getInstructionIndex(mi); - - //assert(lis->hasGapBeforeInstr(miIdx)); - - return miIdx; - } - - /// Ensure there is space after the given machine instruction, returns the - /// instruction's new number. - SlotIndex makeSpaceAfter(MachineInstr *mi) { - //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) { - // FIXME: Should be updated to use rewrite-in-place methods when they're - // introduced. Currently broken. - // lis->scaleNumbering(2); - // ls->scaleNumbering(2); - //} - - SlotIndex miIdx = lis->getInstructionIndex(mi); - - //assert(lis->hasGapAfterInstr(miIdx)); - - return miIdx; - } - - /// Insert a store of the given vreg to the given stack slot immediately - /// after the given instruction. Returns the base index of the inserted - /// instruction. The caller is responsible for adding an appropriate - /// LiveInterval to the LiveIntervals analysis. - SlotIndex insertStoreAfter(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - - MachineBasicBlock::iterator nextInstItr(next(mi)); - - SlotIndex miIdx = makeSpaceAfter(mi); - - tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg, - true, ss, trc); - MachineBasicBlock::iterator storeInstItr(next(mi)); - MachineInstr *storeInst = &*storeInstItr; - - return lis->InsertMachineInstrInMaps(storeInst); - } - - /// Insert a store of the given vreg to the given stack slot immediately - /// before the given instructnion. Returns the base index of the inserted - /// Instruction. - SlotIndex insertStoreBefore(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - SlotIndex miIdx = makeSpaceBefore(mi); - - tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc); - MachineBasicBlock::iterator storeInstItr(prior(mi)); - MachineInstr *storeInst = &*storeInstItr; - - return lis->InsertMachineInstrInMaps(storeInst); - } - - void insertStoreAfterInstOnInterval(LiveInterval *li, - MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - - SlotIndex storeInstIdx = insertStoreAfter(mi, ss, vreg, trc); - SlotIndex start = lis->getInstructionIndex(mi).getDefIndex(), - end = storeInstIdx.getUseIndex(); - - VNInfo *vni = - li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator()); - vni->addKill(storeInstIdx); - DEBUG(errs() << " Inserting store range: [" << start - << ", " << end << ")\n"); - LiveRange lr(start, end, vni); - - li->addRange(lr); - } - - /// Insert a load of the given vreg from the given stack slot immediately - /// after the given instruction. Returns the base index of the inserted - /// instruction. The caller is responsibel for adding/removing an appropriate - /// range vreg's LiveInterval. - SlotIndex insertLoadAfter(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - - MachineBasicBlock::iterator nextInstItr(next(mi)); - - SlotIndex miIdx = makeSpaceAfter(mi); - - tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc); - MachineBasicBlock::iterator loadInstItr(next(mi)); - MachineInstr *loadInst = &*loadInstItr; - - return lis->InsertMachineInstrInMaps(loadInst); - } - - /// Insert a load of the given vreg from the given stack slot immediately - /// before the given instruction. Returns the base index of the inserted - /// instruction. The caller is responsible for adding an appropriate - /// LiveInterval to the LiveIntervals analysis. - SlotIndex insertLoadBefore(MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - SlotIndex miIdx = makeSpaceBefore(mi); - - tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc); - MachineBasicBlock::iterator loadInstItr(prior(mi)); - MachineInstr *loadInst = &*loadInstItr; - - return lis->InsertMachineInstrInMaps(loadInst); - } - - void insertLoadBeforeInstOnInterval(LiveInterval *li, - MachineInstr *mi, unsigned ss, - unsigned vreg, - const TargetRegisterClass *trc) { - - SlotIndex loadInstIdx = insertLoadBefore(mi, ss, vreg, trc); - SlotIndex start = loadInstIdx.getDefIndex(), - end = lis->getInstructionIndex(mi).getUseIndex(); - - VNInfo *vni = - li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator()); - vni->addKill(lis->getInstructionIndex(mi)); - DEBUG(errs() << " Intserting load range: [" << start - << ", " << end << ")\n"); - LiveRange lr(start, end, vni); - - li->addRange(lr); - } - - - /// Add spill ranges for every use/def of the live interval, inserting loads - /// immediately before each use, and stores after each def. No folding is - /// attempted. + /// immediately before each use, and stores after each def. No folding or + /// remat is attempted. std::vector<LiveInterval*> trivialSpillEverywhere(LiveInterval *li) { DEBUG(errs() << "Spilling everywhere " << *li << "\n"); @@ -212,56 +82,77 @@ protected: const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned ss = vrm->assignVirt2StackSlot(li->reg); + // Iterate over reg uses/defs. for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) { + // Grab the use/def instr. MachineInstr *mi = &*regItr; DEBUG(errs() << " Processing " << *mi); + // Step regItr to the next use/def instr. do { ++regItr; } while (regItr != mri->reg_end() && (&*regItr == mi)); + // Collect uses & defs for this instr. SmallVector<unsigned, 2> indices; bool hasUse = false; bool hasDef = false; - for (unsigned i = 0; i != mi->getNumOperands(); ++i) { MachineOperand &op = mi->getOperand(i); - if (!op.isReg() || op.getReg() != li->reg) continue; - hasUse |= mi->getOperand(i).isUse(); hasDef |= mi->getOperand(i).isDef(); - indices.push_back(i); } + // Create a new vreg & interval for this instr. unsigned newVReg = mri->createVirtualRegister(trc); vrm->grow(); vrm->assignVirt2StackSlot(newVReg, ss); - LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); newLI->weight = HUGE_VALF; + // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { - mi->getOperand(indices[i]).setReg(newVReg); - - if (mi->getOperand(indices[i]).isUse()) { - mi->getOperand(indices[i]).setIsKill(true); + unsigned mopIdx = indices[i]; + MachineOperand &mop = mi->getOperand(mopIdx); + mop.setReg(newVReg); + if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { + mop.setIsKill(true); } } - assert(hasUse || hasDef); + // Insert reload if necessary. + MachineBasicBlock::iterator miItr(mi); if (hasUse) { - insertLoadBeforeInstOnInterval(newLI, mi, ss, newVReg, trc); + tii->loadRegFromStackSlot(*mi->getParent(), miItr, newVReg, ss, trc); + MachineInstr *loadInstr(prior(miItr)); + SlotIndex loadIndex = + lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); + SlotIndex endIndex = loadIndex.getNextIndex(); + VNInfo *loadVNI = + newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator()); + loadVNI->addKill(endIndex); + newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } + // Insert store if necessary. if (hasDef) { - insertStoreAfterInstOnInterval(newLI, mi, ss, newVReg, trc); + tii->storeRegToStackSlot(*mi->getParent(), next(miItr), newVReg, true, + ss, trc); + MachineInstr *storeInstr(next(miItr)); + SlotIndex storeIndex = + lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); + SlotIndex beginIndex = storeIndex.getPrevIndex(); + VNInfo *storeVNI = + newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator()); + storeVNI->addKill(storeIndex); + newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } added.push_back(newLI); @@ -279,60 +170,32 @@ class TrivialSpiller : public SpillerBase { public: TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, - VirtRegMap *vrm) : - SpillerBase(mf, lis, ls, vrm) {} + VirtRegMap *vrm) + : SpillerBase(mf, lis, ls, vrm) {} - std::vector<LiveInterval*> spill(LiveInterval *li) { + std::vector<LiveInterval*> spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &spillIs) { + // Ignore spillIs - we don't use it. return trivialSpillEverywhere(li); } - std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno) { - std::vector<LiveInterval*> spillIntervals; - - if (!valno->isDefAccurate() && !valno->isPHIDef()) { - // Early out for values which have no well defined def point. - return spillIntervals; - } - - // Ok.. we should be able to proceed... - const TargetRegisterClass *trc = mri->getRegClass(li->reg); - unsigned ss = vrm->assignVirt2StackSlot(li->reg); - vrm->grow(); - vrm->assignVirt2StackSlot(li->reg, ss); - - MachineInstr *mi = 0; - SlotIndex storeIdx = SlotIndex(); - - if (valno->isDefAccurate()) { - // If we have an accurate def we can just grab an iterator to the instr - // after the def. - mi = lis->getInstructionFromIndex(valno->def); - storeIdx = insertStoreAfter(mi, ss, li->reg, trc).getDefIndex(); - } else { - // if we get here we have a PHI def. - mi = &lis->getMBBFromIndex(valno->def)->front(); - storeIdx = insertStoreBefore(mi, ss, li->reg, trc).getDefIndex(); - } - - MachineBasicBlock *defBlock = mi->getParent(); - SlotIndex loadIdx = SlotIndex(); - - // Now we need to find the load... - MachineBasicBlock::iterator useItr(mi); - for (; !useItr->readsRegister(li->reg); ++useItr) {} - - if (useItr != defBlock->end()) { - MachineInstr *loadInst = useItr; - loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc).getUseIndex(); - } - else { - MachineInstr *loadInst = &defBlock->back(); - loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc).getUseIndex(); - } - - li->removeRange(storeIdx, loadIdx, true); +}; - return spillIntervals; +/// Falls back on LiveIntervals::addIntervalsForSpills. +class StandardSpiller : public Spiller { +private: + LiveIntervals *lis; + const MachineLoopInfo *loopInfo; + VirtRegMap *vrm; +public: + StandardSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, + const MachineLoopInfo *loopInfo, VirtRegMap *vrm) + : lis(lis), loopInfo(loopInfo), vrm(vrm) {} + + /// Falls back on LiveIntervals::addIntervalsForSpills. + std::vector<LiveInterval*> spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &spillIs) { + return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); } }; @@ -340,6 +203,12 @@ public: } llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, - LiveStacks *ls, VirtRegMap *vrm) { - return new TrivialSpiller(mf, lis, ls, vrm); + LiveStacks *ls, + const MachineLoopInfo *loopInfo, + VirtRegMap *vrm) { + switch (spillerOpt) { + case trivial: return new TrivialSpiller(mf, lis, ls, vrm); break; + case standard: return new StandardSpiller(mf, lis, ls, loopInfo, vrm); break; + default: llvm_unreachable("Unreachable!"); break; + } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index 9c3900d..7ec8e6d 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -10,6 +10,7 @@ #ifndef LLVM_CODEGEN_SPILLER_H #define LLVM_CODEGEN_SPILLER_H +#include "llvm/ADT/SmallVector.h" #include <vector> namespace llvm { @@ -19,6 +20,7 @@ namespace llvm { class LiveStacks; class MachineFunction; class MachineInstr; + class MachineLoopInfo; class VirtRegMap; class VNInfo; @@ -32,17 +34,15 @@ namespace llvm { /// Spill the given live range. The method used will depend on the Spiller /// implementation selected. - virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0; - - /// Intra-block split. - virtual std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, - VNInfo *valno) = 0; + virtual std::vector<LiveInterval*> spill(LiveInterval *li, + SmallVectorImpl<LiveInterval*> &spillIs) = 0; }; /// Create and return a spiller object, as specified on the command line. Spiller* createSpiller(MachineFunction *mf, LiveIntervals *li, - LiveStacks *ls, VirtRegMap *vrm); + LiveStacks *ls, const MachineLoopInfo *loopInfo, + VirtRegMap *vrm); } #endif diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 84467ed..5fa690b 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -112,8 +112,7 @@ namespace { MachineBasicBlock *MBB, unsigned Dist); bool DeleteUnusedInstr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - MachineFunction::iterator &mbbi, - unsigned regB, unsigned regBIdx, unsigned Dist); + MachineFunction::iterator &mbbi, unsigned Dist); bool TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, @@ -730,7 +729,7 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI, /// isSafeToDelete - If the specified instruction does not produce any side /// effects and all of its defs are dead, then it's safe to delete. -static bool isSafeToDelete(MachineInstr *MI, unsigned Reg, +static bool isSafeToDelete(MachineInstr *MI, const TargetInstrInfo *TII, SmallVector<unsigned, 4> &Kills) { const TargetInstrDesc &TID = MI->getDesc(); @@ -745,10 +744,9 @@ static bool isSafeToDelete(MachineInstr *MI, unsigned Reg, continue; if (MO.isDef() && !MO.isDead()) return false; - if (MO.isUse() && MO.getReg() != Reg && MO.isKill()) + if (MO.isUse() && MO.isKill()) Kills.push_back(MO.getReg()); } - return true; } @@ -783,11 +781,10 @@ bool TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, - unsigned regB, unsigned regBIdx, unsigned Dist) { // Check if the instruction has no side effects and if all its defs are dead. SmallVector<unsigned, 4> Kills; - if (!isSafeToDelete(mi, regB, TII, Kills)) + if (!isSafeToDelete(mi, TII, Kills)) return false; // If this instruction kills some virtual registers, we need to @@ -810,10 +807,6 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, LV->addVirtualRegisterKilled(Kill, NewKill); } } - - // If regB was marked as a kill, update its Kills list. - if (mi->getOperand(regBIdx).isKill()) - LV->removeVirtualRegisterKilled(regB, mi); } mbbi->erase(mi); // Nuke the old inst. @@ -842,7 +835,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // it so it doesn't clobber regB. bool regBKilled = isKilled(*mi, regB, MRI, TII); if (!regBKilled && mi->getOperand(DstIdx).isDead() && - DeleteUnusedInstr(mi, nmi, mbbi, regB, SrcIdx, Dist)) { + DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { ++NumDeletes; return true; // Done with this instruction. } diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index ec0abd1..c836286 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -77,27 +77,38 @@ struct TrivialRewriter : public VirtRegRewriter { DEBUG(MF.dump()); MachineRegisterInfo *mri = &MF.getRegInfo(); + const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo(); bool changed = false; for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); liItr != liEnd; ++liItr) { - if (TargetRegisterInfo::isVirtualRegister(liItr->first)) { - if (VRM.hasPhys(liItr->first)) { - unsigned preg = VRM.getPhys(liItr->first); - mri->replaceRegWith(liItr->first, preg); - mri->setPhysRegUsed(preg); - changed = true; - } + const LiveInterval *li = liItr->second; + unsigned reg = li->reg; + + if (TargetRegisterInfo::isPhysicalRegister(reg)) { + if (!li->empty()) + mri->setPhysRegUsed(reg); } else { - if (!liItr->second->empty()) { - mri->setPhysRegUsed(liItr->first); + if (!VRM.hasPhys(reg)) + continue; + unsigned pReg = VRM.getPhys(reg); + mri->setPhysRegUsed(pReg); + for (MachineRegisterInfo::reg_iterator regItr = mri->reg_begin(reg), + regEnd = mri->reg_end(); regItr != regEnd;) { + MachineOperand &mop = regItr.getOperand(); + assert(mop.isReg() && mop.getReg() == reg && "reg_iterator broken?"); + ++regItr; + unsigned subRegIdx = mop.getSubReg(); + unsigned pRegOp = subRegIdx ? tri->getSubReg(pReg, subRegIdx) : pReg; + mop.setReg(pRegOp); + mop.setSubReg(0); + changed = true; } } } - DEBUG(errs() << "**** Post Machine Instrs ****\n"); DEBUG(MF.dump()); diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index cb9bd6a..7033861 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -89,16 +89,18 @@ def : ProcNoItin<"xscale", [ArchV5TE]>; def : ProcNoItin<"iwmmxt", [ArchV5TE]>; // V6 Processors. -def : ProcNoItin<"arm1136j-s", [ArchV6]>; -def : ProcNoItin<"arm1136jf-s", [ArchV6, FeatureVFP2]>; -def : ProcNoItin<"arm1176jz-s", [ArchV6]>; -def : ProcNoItin<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; -def : ProcNoItin<"mpcorenovfp", [ArchV6]>; -def : ProcNoItin<"mpcore", [ArchV6, FeatureVFP2]>; +def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>; +def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>; +def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; +def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>; +def : Processor<"mpcore", ARMV6Itineraries, [ArchV6, FeatureVFP2]>; // V6T2 Processors. -def : ProcNoItin<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; -def : ProcNoItin<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +def : Processor<"arm1156t2-s", ARMV6Itineraries, + [ArchV6T2, FeatureThumb2]>; +def : Processor<"arm1156t2f-s", ARMV6Itineraries, + [ArchV6T2, FeatureThumb2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 9be7454..696a8e1 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1545,7 +1545,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { InFlag = SDValue(ResNode, 1); ReplaceUses(SDValue(Op.getNode(), 1), InFlag); } - ReplaceUses(SDValue(Op.getNode(), 0), SDValue(Chain.getNode(), Chain.getResNo())); + ReplaceUses(SDValue(Op.getNode(), 0), + SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } case ARMISD::CMOV: { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3fe634e..79bde29 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -608,11 +608,11 @@ def PICSTR : AXI2stw<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), [(store GPR:$src, addrmodepc:$addr)]>; def PICSTRH : AXI3sth<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}h\t$src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrh${p}\t$src, $addr", [(truncstorei16 GPR:$src, addrmodepc:$addr)]>; def PICSTRB : AXI2stb<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p), - Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstr${p}b\t$src, $addr", + Pseudo, IIC_iStorer, "\n${addr:label}:\n\tstrb${p}\t$src, $addr", [(truncstorei8 GPR:$src, addrmodepc:$addr)]>; } } // isNotDuplicable = 1 diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 2796364..d1831d1 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -66,6 +66,11 @@ def thumb_immshifted_shamt : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(V, MVT::i32); }]>; +// Scaled 4 immediate. +def t_imm_s4 : Operand<i32> { + let PrintMethod = "printThumbS4ImmOperand"; +} + // Define Thumb specific addressing modes. // t_addrmode_rr := reg + reg @@ -134,20 +139,20 @@ def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr, [(set GPR:$dst, (ARMpic_add GPR:$lhs, imm:$cp))]>; // PC relative add. -def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALUi, - "add\t$dst, pc, $rhs * 4", []>; +def tADDrPCi : T1I<(outs tGPR:$dst), (ins t_imm_s4:$rhs), IIC_iALUi, + "add\t$dst, pc, $rhs", []>; // ADD rd, sp, #imm8 -def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALUi, - "add\t$dst, $sp, $rhs * 4", []>; +def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, t_imm_s4:$rhs), IIC_iALUi, + "add\t$dst, $sp, $rhs", []>; // ADD sp, sp, #imm7 -def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, - "add\t$dst, $rhs * 4", []>; +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, + "add\t$dst, $rhs", []>; // SUB sp, sp, #imm7 -def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALUi, - "sub\t$dst, $rhs * 4", []>; +def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), IIC_iALUi, + "sub\t$dst, $rhs", []>; // ADD rm, sp def tADDrSP : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, @@ -159,8 +164,8 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr, // Pseudo instruction that will expand into a tSUBspi + a copy. let usesCustomInserter = 1 in { // Expanded after instruction selection. -def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), - NoItinerary, "@ sub\t$dst, $rhs * 4", []>; +def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t_imm_s4:$rhs), + NoItinerary, "@ sub\t$dst, $rhs", []>; def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), NoItinerary, "@ add\t$dst, $rhs", []>; diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td index 1ace718..0fef466 100644 --- a/lib/Target/ARM/ARMScheduleV6.td +++ b/lib/Target/ARM/ARMScheduleV6.td @@ -11,4 +11,190 @@ // //===----------------------------------------------------------------------===// -// TODO: Add model for an ARM11 +// Model based on ARM1176 +// +// Scheduling information derived from "ARM1176JZF-S Technical Reference Manual". +// +def ARMV6Itineraries : ProcessorItineraries<[ + // + // No operand cycles + InstrItinData<IIC_iALUx , [InstrStage<1, [FU_Pipe0]>]>, + // + // Binary Instructions that produce a result + InstrItinData<IIC_iALUi , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iALUr , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + InstrItinData<IIC_iALUsi , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + InstrItinData<IIC_iALUsr , [InstrStage<2, [FU_Pipe0]>], [3, 3, 2, 1]>, + // + // Unary Instructions that produce a result + InstrItinData<IIC_iUNAr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iUNAsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iUNAsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + // + // Compare instructions + InstrItinData<IIC_iCMPi , [InstrStage<1, [FU_Pipe0]>], [2]>, + InstrItinData<IIC_iCMPr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iCMPsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iCMPsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + // + // Move instructions, unconditional + InstrItinData<IIC_iMOVi , [InstrStage<1, [FU_Pipe0]>], [2]>, + InstrItinData<IIC_iMOVr , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + InstrItinData<IIC_iMOVsi , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + InstrItinData<IIC_iMOVsr , [InstrStage<2, [FU_Pipe0]>], [3, 2, 1]>, + // + // Move instructions, conditional + InstrItinData<IIC_iCMOVi , [InstrStage<1, [FU_Pipe0]>], [3]>, + InstrItinData<IIC_iCMOVr , [InstrStage<1, [FU_Pipe0]>], [3, 2]>, + InstrItinData<IIC_iCMOVsi , [InstrStage<1, [FU_Pipe0]>], [3, 1]>, + InstrItinData<IIC_iCMOVsr , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + + // Integer multiply pipeline + // + InstrItinData<IIC_iMUL16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + InstrItinData<IIC_iMAC16 , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1, 2]>, + InstrItinData<IIC_iMUL32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1]>, + InstrItinData<IIC_iMAC32 , [InstrStage<2, [FU_Pipe0]>], [5, 1, 1, 2]>, + InstrItinData<IIC_iMUL64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1]>, + InstrItinData<IIC_iMAC64 , [InstrStage<3, [FU_Pipe0]>], [6, 1, 1, 2]>, + + // Integer load pipeline + // + // Immediate offset + InstrItinData<IIC_iLoadi , [InstrStage<1, [FU_Pipe0]>], [4, 1]>, + // + // Register offset + InstrItinData<IIC_iLoadr , [InstrStage<1, [FU_Pipe0]>], [4, 1, 1]>, + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iLoadsi , [InstrStage<2, [FU_Pipe0]>], [5, 2, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iLoadiu , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iLoadru , [InstrStage<1, [FU_Pipe0]>], [4, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iLoadsiu , [InstrStage<2, [FU_Pipe0]>], [5, 2, 2, 1]>, + + // + // Load multiple + InstrItinData<IIC_iLoadm , [InstrStage<3, [FU_Pipe0]>]>, + + // Integer store pipeline + // + // Immediate offset + InstrItinData<IIC_iStorei , [InstrStage<1, [FU_Pipe0]>], [2, 1]>, + // + // Register offset + InstrItinData<IIC_iStorer , [InstrStage<1, [FU_Pipe0]>], [2, 1, 1]>, + + // + // Scaled register offset, issues over 2 cycles + InstrItinData<IIC_iStoresi , [InstrStage<2, [FU_Pipe0]>], [2, 2, 1]>, + // + // Immediate offset with update + InstrItinData<IIC_iStoreiu , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1]>, + // + // Register offset with update + InstrItinData<IIC_iStoreru , [InstrStage<1, [FU_Pipe0]>], [2, 2, 1, 1]>, + // + // Scaled register offset with update, issues over 2 cycles + InstrItinData<IIC_iStoresiu, [InstrStage<2, [FU_Pipe0]>], [2, 2, 2, 1]>, + // + // Store multiple + InstrItinData<IIC_iStorem , [InstrStage<3, [FU_Pipe0]>]>, + + // Branch + // + // no delay slots, so the latency of a branch is unimportant + InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>, + + // VFP + // Issue through integer pipeline, and execute in NEON unit. We assume + // RunFast mode so that NFP pipeline is used for single-precision when + // possible. + // + // FP Special Register to Integer Register File Move + InstrItinData<IIC_fpSTAT , [InstrStage<1, [FU_Pipe0]>], [3]>, + // + // Single-precision FP Unary + InstrItinData<IIC_fpUNA32 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Double-precision FP Unary + InstrItinData<IIC_fpUNA64 , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Single-precision FP Compare + InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + // + // Double-precision FP Compare + InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0]>], [2, 2]>, + // + // Single to Double FP Convert + InstrItinData<IIC_fpCVTSD , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Double to Single FP Convert + InstrItinData<IIC_fpCVTDS , [InstrStage<1, [FU_Pipe0]>], [5, 2]>, + // + // Single-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTSI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Double-Precision FP to Integer Convert + InstrItinData<IIC_fpCVTDI , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Integer to Single-Precision FP Convert + InstrItinData<IIC_fpCVTIS , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Integer to Double-Precision FP Convert + InstrItinData<IIC_fpCVTID , [InstrStage<1, [FU_Pipe0]>], [9, 2]>, + // + // Single-precision FP ALU + InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + // + // Double-precision FP ALU + InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + // + // Single-precision FP Multiply + InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2]>, + // + // Double-precision FP Multiply + InstrItinData<IIC_fpMUL64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2]>, + // + // Single-precision FP MAC + InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0]>], [9, 2, 2, 2]>, + // + // Double-precision FP MAC + InstrItinData<IIC_fpMAC64 , [InstrStage<2, [FU_Pipe0]>], [9, 2, 2, 2]>, + // + // Single-precision FP DIV + InstrItinData<IIC_fpDIV32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + // + // Double-precision FP DIV + InstrItinData<IIC_fpDIV64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + // + // Single-precision FP SQRT + InstrItinData<IIC_fpSQRT32 , [InstrStage<15, [FU_Pipe0]>], [20, 2, 2]>, + // + // Double-precision FP SQRT + InstrItinData<IIC_fpSQRT64 , [InstrStage<29, [FU_Pipe0]>], [34, 2, 2]>, + // + // Single-precision FP Load + InstrItinData<IIC_fpLoad32 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + // + // Double-precision FP Load + InstrItinData<IIC_fpLoad64 , [InstrStage<1, [FU_Pipe0]>], [5, 2, 2]>, + // + // FP Load Multiple + InstrItinData<IIC_fpLoadm , [InstrStage<3, [FU_Pipe0]>]>, + // + // Single-precision FP Store + InstrItinData<IIC_fpStore32 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + // + // Double-precision FP Store + // use FU_Issue to enforce the 1 load/store per cycle limit + InstrItinData<IIC_fpStore64 , [InstrStage<1, [FU_Pipe0]>], [2, 2, 2]>, + // + // FP Store Multiple + InstrItinData<IIC_fpStorem , [InstrStage<3, [FU_Pipe0]>]> +]>; diff --git a/lib/Target/ARM/ARMScheduleV7.td b/lib/Target/ARM/ARMScheduleV7.td index e565813..427645c 100644 --- a/lib/Target/ARM/ARMScheduleV7.td +++ b/lib/Target/ARM/ARMScheduleV7.td @@ -184,7 +184,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP Compare InstrItinData<IIC_fpCMP32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [1, 1]>, // // Double-precision FP Compare InstrItinData<IIC_fpCMP64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -221,7 +221,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP ALU InstrItinData<IIC_fpALU32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, // // Double-precision FP ALU InstrItinData<IIC_fpALU64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -230,7 +230,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP Multiply InstrItinData<IIC_fpMUL32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [7, 1, 1]>, // // Double-precision FP Multiply InstrItinData<IIC_fpMUL64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, @@ -239,7 +239,7 @@ def CortexA8Itineraries : ProcessorItineraries<[ // // Single-precision FP MAC InstrItinData<IIC_fpMAC32 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, - InstrStage<1, [FU_NPipe]>], [7, 1]>, + InstrStage<1, [FU_NPipe]>], [7, 2, 1, 1]>, // // Double-precision FP MAC InstrItinData<IIC_fpMAC64 , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 0352503..dd4a240 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -110,6 +110,7 @@ namespace { const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); + void printThumbS4ImmOperand(const MachineInstr *MI, int OpNum); void printThumbITMask(const MachineInstr *MI, int OpNum); void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNum); void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNum, @@ -674,6 +675,10 @@ ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) { //===--------------------------------------------------------------------===// +void ARMAsmPrinter::printThumbS4ImmOperand(const MachineInstr *MI, int Op) { + O << "#" << MI->getOperand(Op).getImm() * 4; +} + void ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) { // (3 - the number of trailing zeros) is the number of then / else. @@ -713,7 +718,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); else if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs * Scale; + O << ", #+" << ImmOffs * Scale; O << "]"; } @@ -735,7 +740,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { const MachineOperand &MO2 = MI->getOperand(Op+1); O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) - O << ", #" << ImmOffs << " * 4"; + O << ", #+" << ImmOffs*4; O << "]"; } @@ -801,9 +806,9 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, int32_t OffImm = (int32_t)MO2.getImm() / 4; // Don't print +0. if (OffImm < 0) - O << ", #-" << -OffImm << " * 4"; + O << ", #-" << -OffImm * 4; else if (OffImm > 0) - O << ", #+" << OffImm << " * 4"; + O << ", #+" << OffImm * 4; O << "]"; } diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index 0047925..9fc57e0 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -351,3 +351,8 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { // FIXME: remove this. abort(); } + +void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) { + // FIXME: remove this. + abort(); +} diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 9e7f8d5..23a7f05 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -52,7 +52,8 @@ public: const char *Modifier = 0); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); - + + void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum); void printThumbITMask(const MCInst *MI, unsigned OpNum) {} void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 9ce30aa..ad1739c 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -105,7 +105,7 @@ namespace { // FIXME: Clean this up after splitting each Thumb load / store opcode // into multiple ones. - { ARM::t2LDRi12,ARM::tLDR, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2LDRi12,ARM::tLDR, ARM::tLDRspi, 5, 8, 1, 0, 0,0, 1 }, { ARM::t2LDRs, ARM::tLDR, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRBi12,ARM::tLDRB, 0, 5, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRBs, ARM::tLDRB, 0, 0, 0, 1, 0, 0,0, 1 }, @@ -113,7 +113,7 @@ namespace { { ARM::t2LDRHs, ARM::tLDRH, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRSBs,ARM::tLDRSB, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2LDRSHs,ARM::tLDRSH, 0, 0, 0, 1, 0, 0,0, 1 }, - { ARM::t2STRi12,ARM::tSTR, 0, 5, 0, 1, 0, 0,0, 1 }, + { ARM::t2STRi12,ARM::tSTR, ARM::tSTRspi, 5, 8, 1, 0, 0,0, 1 }, { ARM::t2STRs, ARM::tSTR, 0, 0, 0, 1, 0, 0,0, 1 }, { ARM::t2STRBi12,ARM::tSTRB, 0, 5, 0, 1, 0, 0,0, 1 }, { ARM::t2STRBs, ARM::tSTRB, 0, 0, 0, 1, 0, 0,0, 1 }, @@ -244,8 +244,13 @@ static bool VerifyLowRegs(MachineInstr *MI) { continue; if (isLROk && Reg == ARM::LR) continue; - if (isSPOk && Reg == ARM::SP) - continue; + if (Reg == ARM::SP) { + if (isSPOk) + continue; + if (i == 1 && (Opc == ARM::t2LDRi12 || Opc == ARM::t2STRi12)) + // Special case for these ldr / str with sp as base register. + continue; + } if (!isARMLowRegister(Reg)) return false; } @@ -261,17 +266,26 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, unsigned Scale = 1; bool HasImmOffset = false; bool HasShift = false; + bool HasOffReg = true; bool isLdStMul = false; unsigned Opc = Entry.NarrowOpc1; unsigned OpNum = 3; // First 'rest' of operands. + uint8_t ImmLimit = Entry.Imm1Limit; switch (Entry.WideOpc) { default: llvm_unreachable("Unexpected Thumb2 load / store opcode!"); case ARM::t2LDRi12: - case ARM::t2STRi12: + case ARM::t2STRi12: { + unsigned BaseReg = MI->getOperand(1).getReg(); + if (BaseReg == ARM::SP) { + Opc = Entry.NarrowOpc2; + ImmLimit = Entry.Imm2Limit; + HasOffReg = false; + } Scale = 4; HasImmOffset = true; break; + } case ARM::t2LDRBi12: case ARM::t2STRBi12: HasImmOffset = true; @@ -325,7 +339,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, unsigned OffsetImm = 0; if (HasImmOffset) { OffsetImm = MI->getOperand(2).getImm(); - unsigned MaxOffset = ((1 << Entry.Imm1Limit) - 1) * Scale; + unsigned MaxOffset = ((1 << ImmLimit) - 1) * Scale; if ((OffsetImm & (Scale-1)) || OffsetImm > MaxOffset) // Make sure the immediate field fits. return false; @@ -337,7 +351,7 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, TII->get(Opc)); if (!isLdStMul) { MIB.addOperand(MI->getOperand(0)).addOperand(MI->getOperand(1)); - if (Entry.NarrowOpc1 != ARM::tLDRSB && Entry.NarrowOpc1 != ARM::tLDRSH) { + if (Opc != ARM::tLDRSB && Opc != ARM::tLDRSH) { // tLDRSB and tLDRSH do not have an immediate offset field. On the other // hand, it must have an offset register. // FIXME: Remove this special case. @@ -345,13 +359,17 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, } assert((!HasShift || OffsetReg) && "Invalid so_reg load / store address!"); - MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); + if (HasOffReg) + MIB.addReg(OffsetReg, getKillRegState(OffsetKill)); } // Transfer the rest of operands. for (unsigned e = MI->getNumOperands(); OpNum != e; ++OpNum) MIB.addOperand(MI->getOperand(OpNum)); + // Transfer memoperands. + (*MIB).setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); + DEBUG(errs() << "Converted 32-bit: " << *MI << " to 16-bit: " << *MIB); MBB.erase(MI); diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 4898fae..efd3fb7 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -365,6 +365,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { case MachineOperand::MO_ConstantPoolIndex: O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" << MO.getIndex(); + if (MO.getOffset()) + O << "+" << MO.getOffset(); break; default: diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index cbcedb8..2990ba9 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -90,6 +90,8 @@ private: bool SelectAddr(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); + SDNode *SelectLoadFp64(SDValue N); + SDNode *SelectStoreFp64(SDValue N); // getI32Imm - Return a target constant with the specified // value, of type i32. @@ -198,6 +200,121 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) return true; } +SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDValue N) { + MVT::SimpleValueType NVT = + N.getNode()->getValueType(0).getSimpleVT().SimpleTy; + + if (!Subtarget.isMips1() || NVT != MVT::f64) + return NULL; + + if (!Predicate_unindexedload(N.getNode()) || + !Predicate_load(N.getNode())) + return NULL; + + SDValue Chain = N.getOperand(0); + SDValue N1 = N.getOperand(1); + SDValue Offset0, Offset1, Base; + + if (!SelectAddr(N, N1, Offset0, Base) || + N1.getValueType() != MVT::i32) + return NULL; + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + DebugLoc dl = N.getDebugLoc(); + + // The second load should start after for 4 bytes. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) + Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); + else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Offset0)) + Offset1 = CurDAG->getTargetConstantPool(CP->getConstVal(), + MVT::i32, + CP->getAlignment(), + CP->getOffset()+4, + CP->getTargetFlags()); + else + return NULL; + + // Instead of: + // ldc $f0, X($3) + // Generate: + // lwc $f0, X($3) + // lwc $f1, X+4($3) + SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, + MVT::Other, Offset0, Base, Chain); + SDValue Undef = SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, + dl, NVT), 0); + SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPEVEN, dl, + MVT::f64, Undef, SDValue(LD0, 0)); + + SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32, + MVT::Other, Offset1, Base, SDValue(LD0, 1)); + SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::SUBREG_FPODD, dl, + MVT::f64, I0, SDValue(LD1, 0)); + + ReplaceUses(N, I1); + ReplaceUses(N.getValue(1), Chain); + cast<MachineSDNode>(LD0)->setMemRefs(MemRefs0, MemRefs0 + 1); + cast<MachineSDNode>(LD1)->setMemRefs(MemRefs0, MemRefs0 + 1); + return I1.getNode(); +} + +SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDValue N) { + + if (!Subtarget.isMips1() || + N.getOperand(1).getValueType() != MVT::f64) + return NULL; + + SDValue Chain = N.getOperand(0); + + if (!Predicate_unindexedstore(N.getNode()) || + !Predicate_store(N.getNode())) + return NULL; + + SDValue N1 = N.getOperand(1); + SDValue N2 = N.getOperand(2); + SDValue Offset0, Offset1, Base; + + if (!SelectAddr(N, N2, Offset0, Base) || + N1.getValueType() != MVT::f64 || + N2.getValueType() != MVT::i32) + return NULL; + + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + DebugLoc dl = N.getDebugLoc(); + + // Get the even and odd part from the f64 register + SDValue FPOdd = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPODD, + dl, MVT::f32, N1); + SDValue FPEven = CurDAG->getTargetExtractSubreg(Mips::SUBREG_FPEVEN, + dl, MVT::f32, N1); + + // The second store should start after for 4 bytes. + if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Offset0)) + Offset1 = CurDAG->getTargetConstant(C->getSExtValue()+4, MVT::i32); + else + return NULL; + + // Instead of: + // sdc $f0, X($3) + // Generate: + // swc $f0, X($3) + // swc $f1, X+4($3) + SDValue Ops0[] = { FPEven, Offset0, Base, Chain }; + Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, + MVT::Other, Ops0, 4), 0); + cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); + + SDValue Ops1[] = { FPOdd, Offset1, Base, Chain }; + Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl, + MVT::Other, Ops1, 4), 0); + cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1); + + ReplaceUses(N.getValue(0), Chain); + return Chain.getNode(); +} + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDValue N) { @@ -345,6 +462,18 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { break; } + case ISD::LOAD: + if (SDNode *ResNode = SelectLoadFp64(N)) + return ResNode; + // Other cases are autogenerated. + break; + + case ISD::STORE: + if (SDNode *ResNode = SelectStoreFp64(N)) + return ResNode; + // Other cases are autogenerated. + break; + /// Handle direct and indirect calls when using PIC. On PIC, when /// GOT is smaller than about 64k (small code) the GA target is /// loaded with only one instruction. Otherwise GA's target must diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 5b45921..6a3ec00 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -23,6 +23,15 @@ class MipsSubtarget; class TargetInstrInfo; class Type; +namespace Mips { + /// SubregIndex - The index of various sized subregister classes. Note that + /// these indices must be kept in sync with the class indices in the + /// MipsRegisterInfo.td file. + enum SubregIndex { + SUBREG_FPEVEN = 1, SUBREG_FPODD = 2 + }; +} + struct MipsRegisterInfo : public MipsGenRegisterInfo { const MipsSubtarget &Subtarget; const TargetInstrInfo &TII; diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index bbb275c..00e7723 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -17,6 +17,12 @@ class MipsReg<string n> : Register<n> { let Namespace = "Mips"; } +class MipsRegWithSubRegs<string n, list<Register> subregs> + : RegisterWithSubRegs<n, subregs> { + field bits<5> Num; + let Namespace = "Mips"; +} + // Mips CPU Registers class MipsGPRReg<bits<5> num, string n> : MipsReg<n> { let Num = num; @@ -28,9 +34,9 @@ class FPR<bits<5> num, string n> : MipsReg<n> { } // Mips 64-bit (aliased) FPU Registers -class AFPR<bits<5> num, string n, list<Register> aliases> : MipsReg<n> { +class AFPR<bits<5> num, string n, list<Register> subregs> + : MipsRegWithSubRegs<n, subregs> { let Num = num; - let Aliases = aliases; } //===----------------------------------------------------------------------===// @@ -135,6 +141,23 @@ let Namespace = "Mips" in { } //===----------------------------------------------------------------------===// +// Subregister Set Definitions +//===----------------------------------------------------------------------===// + +def mips_subreg_fpeven : PatLeaf<(i32 1)>; +def mips_subreg_fpodd : PatLeaf<(i32 2)>; + +def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [F0, F2, F4, F6, F8, F10, F12, F14, + F16, F18, F20, F22, F24, F26, F28, F30]>; + +def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [F1, F3, F5, F7, F9, F11, F13, F15, + F17, F19, F21, F23, F25, F27, F29, F31]>; + +//===----------------------------------------------------------------------===// // Register Classes //===----------------------------------------------------------------------===// @@ -232,6 +255,7 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, // Reserved D15]> { + let SubRegClassList = [FGR32, FGR32]; let MethodProtos = [{ iterator allocation_order_end(const MachineFunction &MF) const; }]; diff --git a/lib/Target/Mips/MipsTargetObjectFile.cpp b/lib/Target/Mips/MipsTargetObjectFile.cpp index 85e9d65..0fb423d 100644 --- a/lib/Target/Mips/MipsTargetObjectFile.cpp +++ b/lib/Target/Mips/MipsTargetObjectFile.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "MipsTargetObjectFile.h" +#include "MipsSubtarget.h" #include "llvm/DerivedTypes.h" #include "llvm/GlobalVariable.h" #include "llvm/MC/MCSectionELF.h" @@ -56,6 +57,12 @@ bool MipsTargetObjectFile::IsGlobalInSmallSection(const GlobalValue *GV, bool MipsTargetObjectFile:: IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM, SectionKind Kind) const { + + // Only use small section for non linux targets. + const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); + if (Subtarget.isLinux()) + return false; + // Only global variables, not functions. const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV); if (!GVA) diff --git a/lib/Target/SubtargetFeature.cpp b/lib/Target/SubtargetFeature.cpp index 664a43c..590574e 100644 --- a/lib/Target/SubtargetFeature.cpp +++ b/lib/Target/SubtargetFeature.cpp @@ -357,3 +357,30 @@ void SubtargetFeatures::print(raw_ostream &OS) const { void SubtargetFeatures::dump() const { print(errs()); } + +/// getDefaultSubtargetFeatures - Return a string listing +/// the features associated with the target triple. +/// +/// FIXME: This is an inelegant way of specifying the features of a +/// subtarget. It would be better if we could encode this information +/// into the IR. See <rdar://5972456>. +/// +std::string SubtargetFeatures::getDefaultSubtargetFeatures( + const Triple& Triple) { + switch (Triple.getVendor()) { + case Triple::Apple: + switch (Triple.getArch()) { + case Triple::ppc: // powerpc-apple-* + return std::string("altivec"); + case Triple::ppc64: // powerpc64-apple-* + return std::string("64bit,altivec"); + default: + break; + } + break; + default: + break; + } + + return std::string(""); +} diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index f887523..6fdbc92 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -783,8 +783,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx, } // Exception Handling. - LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0, - SectionKind::getDataRel()); + LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0, + SectionKind::getReadOnlyWithRel()); EHFrameSection = getMachOSection("__TEXT", "__eh_frame", MCSectionMachO::S_COALESCED | diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index d7106a0..2a561c6 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -333,6 +333,8 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) { case MachineOperand::MO_JumpTableIndex: O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' << MO.getIndex(); + case MachineOperand::MO_BlockAddress: + GetBlockAddressSymbol(MO.getBlockAddress())->print(O, MAI); break; default: llvm_unreachable("not implemented"); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 16e68fe..00dcce6 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -111,7 +111,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) setOperationAction(ISD::JumpTable, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - + setOperationAction(ISD::BlockAddress, MVT::i32 , Custom); + // Thread Local Storage setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); @@ -158,6 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) { { case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::ConstantPool: return LowerConstantPool(Op, DAG); case ISD::JumpTable: return LowerJumpTable(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); @@ -288,6 +290,17 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) } SDValue XCoreTargetLowering:: +LowerBlockAddress(SDValue Op, SelectionDAG &DAG) +{ + DebugLoc DL = Op.getDebugLoc(); + + BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); + SDValue Result = DAG.getBlockAddress(BA, DL, /*isTarget=*/true); + + return DAG.getNode(XCoreISD::PCRelativeWrapper, DL, getPointerTy(), Result); +} + +SDValue XCoreTargetLowering:: LowerConstantPool(SDValue Op, SelectionDAG &DAG) { ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 10631af..f86be5e 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -120,6 +120,7 @@ namespace llvm { SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG); + SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG); SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG); SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 4ed4ed4..d4ae49e 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -679,6 +679,12 @@ def LDAP_lu10 : _FLU10< "ldap r11, $addr", [(set R11, (pcrelwrapper tglobaladdr:$addr))]>; +let Defs = [R11], isReMaterializable = 1 in +def LDAP_lu10_ba : _FLU10<(outs), + (ins i32imm:$addr), + "ldap r11, $addr", + [(set R11, (pcrelwrapper tblockaddress:$addr))]>; + let isCall=1, // All calls clobber the the link register and the non-callee-saved registers: Defs = [R0, R1, R2, R3, R11, LR] in { diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index 2ab0972..44a2c1f 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -305,6 +305,12 @@ ReprocessLoop: } } + // If there are duplicate phi nodes (for example, from loop rotation), + // get rid of them. + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) + EliminateDuplicatePHINodes(*BB); + return Changed; } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 8dbc808..89b0bd9 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -1594,7 +1594,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { /// which differ only in the order of the incoming values, but instcombine /// orders them so it usually won't matter. /// -static bool EliminateDuplicatePHINodes(BasicBlock *BB) { +bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) { bool Changed = false; // This implementation doesn't currently consider undef operands |