summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authored <ed@FreeBSD.org>2009-06-22 08:08:12 +0000
committered <ed@FreeBSD.org>2009-06-22 08:08:12 +0000
commita4c19d68f13cf0a83bc0da53bd6d547fcaf635fe (patch)
tree86c1bc482baa6c81fc70b8d715153bfa93377186 /lib
parentdb89e312d968c258aba3c79c1c398f5fb19267a3 (diff)
downloadFreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.zip
FreeBSD-src-a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe.tar.gz
Update LLVM sources to r73879.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/ConstantFolding.cpp4
-rw-r--r--lib/Analysis/DebugInfo.cpp2
-rw-r--r--lib/Analysis/IVUsers.cpp47
-rw-r--r--lib/Analysis/ScalarEvolution.cpp811
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp20
-rw-r--r--lib/Analysis/ValueTracking.cpp35
-rw-r--r--lib/AsmParser/LLLexer.cpp4
-rw-r--r--lib/AsmParser/LLParser.cpp16
-rw-r--r--lib/AsmParser/LLToken.h4
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp29
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp22
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.cpp4
-rw-r--r--lib/CodeGen/IfConversion.cpp13
-rw-r--r--lib/CodeGen/LazyLiveness.cpp19
-rw-r--r--lib/CodeGen/LiveInterval.cpp60
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp65
-rw-r--r--lib/CodeGen/LiveVariables.cpp13
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp2
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp42
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp57
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp47
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp18
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp160
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h8
-rw-r--r--lib/CodeGen/Spiller.cpp156
-rw-r--r--lib/CodeGen/Spiller.h11
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp11
-rw-r--r--lib/CodeGen/VirtRegMap.cpp16
-rw-r--r--lib/CodeGen/VirtRegMap.h8
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp2
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/ManagedStatic.cpp24
-rw-r--r--lib/Support/SourceMgr.cpp127
-rw-r--r--lib/Support/Triple.cpp5
-rw-r--r--lib/System/Atomic.cpp28
-rw-r--r--lib/System/CMakeLists.txt2
-rw-r--r--lib/System/Mutex.cpp20
-rw-r--r--lib/System/RWMutex.cpp175
-rw-r--r--lib/System/Threading.cpp63
-rw-r--r--lib/System/Unix/Mutex.inc10
-rw-r--r--lib/System/Unix/Path.inc8
-rw-r--r--lib/System/Unix/RWMutex.inc43
-rw-r--r--lib/System/Unix/Unix.h9
-rw-r--r--lib/System/Win32/Mutex.inc10
-rw-r--r--lib/System/Win32/Path.inc18
-rw-r--r--lib/System/Win32/RWMutex.inc58
-rw-r--r--lib/Target/ARM/ARM.td94
-rw-r--r--lib/Target/ARM/ARMCallingConv.td27
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp4
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp40
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp62
-rw-r--r--lib/Target/ARM/ARMISelLowering.h1
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp8
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td32
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td5
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td196
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp300
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.cpp344
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.h37
-rw-r--r--lib/Target/ARM/ARMRegisterInfo.td4
-rw-r--r--lib/Target/ARM/ARMSchedule.td35
-rw-r--r--lib/Target/ARM/ARMScheduleV6.td22
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp5
-rw-r--r--lib/Target/ARM/ARMSubtarget.h16
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.cpp91
-rw-r--r--lib/Target/ARM/ARMTargetAsmInfo.h3
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp13
-rw-r--r--lib/Target/ARM/ARMTargetMachine.h18
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp44
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp2
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp34
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.h20
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp14
-rw-r--r--lib/Target/CBackend/CBackend.cpp16
-rw-r--r--lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp23
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp19
-rw-r--r--lib/Target/CellSPU/SPUTargetAsmInfo.cpp3
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.cpp21
-rw-r--r--lib/Target/CellSPU/SPUTargetMachine.h18
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp5
-rw-r--r--lib/Target/DarwinTargetAsmInfo.cpp47
-rw-r--r--lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp15
-rw-r--r--lib/Target/IA64/IA64ISelLowering.cpp4
-rw-r--r--lib/Target/IA64/IA64TargetMachine.cpp23
-rw-r--r--lib/Target/IA64/IA64TargetMachine.h18
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp5
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp5
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp14
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp14
-rw-r--r--lib/Target/Mips/MipsTargetMachine.h15
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.cpp39
-rw-r--r--lib/Target/PIC16/PIC16AsmPrinter.h2
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp446
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.h53
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp66
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.cpp5
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp28
-rw-r--r--lib/Target/PowerPC/PPCTargetAsmInfo.cpp46
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp5
-rw-r--r--lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp18
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp21
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.h12
-rw-r--r--lib/Target/TargetAsmInfo.cpp13
-rw-r--r--lib/Target/X86/AsmPrinter/CMakeLists.txt1
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp320
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h58
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp143
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp14
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp57
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h3
-rw-r--r--lib/Target/X86/README.txt45
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp9
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp28
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp15
-rw-r--r--lib/Target/X86/X86Instr64bit.td33
-rw-r--r--lib/Target/X86/X86InstrInfo.td113
-rw-r--r--lib/Target/X86/X86InstrSSE.td6
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp27
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetAsmInfo.cpp62
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp5
-rw-r--r--lib/Target/XCore/XCoreAsmPrinter.cpp6
-rw-r--r--lib/Target/XCore/XCoreTargetAsmInfo.cpp3
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp5
-rw-r--r--lib/Transforms/IPO/CMakeLists.txt1
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp5
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp5
-rw-r--r--lib/Transforms/IPO/RaiseAllocations.cpp4
-rw-r--r--lib/Transforms/Instrumentation/RSProfiling.cpp12
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp4
-rw-r--r--lib/Transforms/Scalar/GVN.cpp29
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp13
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp394
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp94
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp151
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp6
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp93
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp16
-rw-r--r--lib/Transforms/Utils/Local.cpp45
-rw-r--r--lib/Transforms/Utils/LowerAllocations.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp29
-rw-r--r--lib/VMCore/AsmWriter.cpp13
-rw-r--r--lib/VMCore/ConstantFold.cpp48
-rw-r--r--lib/VMCore/Constants.cpp330
-rw-r--r--lib/VMCore/Function.cpp8
-rw-r--r--lib/VMCore/Instructions.cpp87
-rw-r--r--lib/VMCore/LeakDetector.cpp60
-rw-r--r--lib/VMCore/Mangler.cpp9
-rw-r--r--lib/VMCore/Pass.cpp22
-rw-r--r--lib/VMCore/PassManager.cpp8
-rw-r--r--lib/VMCore/Type.cpp268
-rw-r--r--lib/VMCore/TypeSymbolTable.cpp29
-rw-r--r--lib/VMCore/Value.cpp16
-rw-r--r--lib/VMCore/Verifier.cpp20
161 files changed, 5683 insertions, 1930 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 261c635..5aa4d56 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -365,7 +365,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
if (TD && CE->getOpcode() == Instruction::IntToPtr) {
Constant *Input = CE->getOperand(0);
- unsigned InWidth = Input->getType()->getPrimitiveSizeInBits();
+ unsigned InWidth = Input->getType()->getScalarSizeInBits();
if (TD->getPointerSizeInBits() < InWidth) {
Constant *Mask =
ConstantInt::get(APInt::getLowBitsSet(InWidth,
@@ -384,7 +384,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
if (TD &&
TD->getPointerSizeInBits() <=
- CE->getType()->getPrimitiveSizeInBits()) {
+ CE->getType()->getScalarSizeInBits()) {
if (CE->getOpcode() == Instruction::PtrToInt) {
Constant *Input = CE->getOperand(0);
Constant *C = FoldBitCast(Input, DestTy, *TD);
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 6bdb64c..adda5ee 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -352,7 +352,7 @@ Constant *DIFactory::GetStringConstant(const std::string &String) {
const PointerType *DestTy = PointerType::getUnqual(Type::Int8Ty);
- // If empty string then use a sbyte* null instead.
+ // If empty string then use a i8* null instead.
if (String.empty())
return Slot = ConstantPointerNull::get(DestTy);
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 7af9130..6a53a83 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -82,11 +82,8 @@ static bool containsAddRecFromDifferentLoop(SCEVHandle S, Loop *L) {
/// outer loop of the current loop.
static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
SCEVHandle &Start, SCEVHandle &Stride,
- bool &isSigned,
ScalarEvolution *SE, DominatorTree *DT) {
SCEVHandle TheAddRec = Start; // Initialize to zero.
- bool isSExt = false;
- bool isZExt = false;
// If the outer level is an AddExpr, the operands are all start values except
// for a nested AddRecExpr.
@@ -101,13 +98,6 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
} else {
Start = SE->getAddExpr(Start, AE->getOperand(i));
}
-
- } else if (const SCEVZeroExtendExpr *Z = dyn_cast<SCEVZeroExtendExpr>(SH)) {
- TheAddRec = Z->getOperand();
- isZExt = true;
- } else if (const SCEVSignExtendExpr *S = dyn_cast<SCEVSignExtendExpr>(SH)) {
- TheAddRec = S->getOperand();
- isSExt = true;
} else if (isa<SCEVAddRecExpr>(SH)) {
TheAddRec = SH;
} else {
@@ -120,9 +110,8 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
// Use getSCEVAtScope to attempt to simplify other loops out of
// the picture.
SCEVHandle AddRecStart = AddRec->getStart();
- SCEVHandle BetterAddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
- if (!isa<SCEVCouldNotCompute>(BetterAddRecStart))
- AddRecStart = BetterAddRecStart;
+ AddRecStart = SE->getSCEVAtScope(AddRecStart, UseLoop);
+ SCEVHandle AddRecStride = AddRec->getStepRecurrence(*SE);
// FIXME: If Start contains an SCEVAddRecExpr from a different loop, other
// than an outer loop of the current loop, reject it. LSR has no concept of
@@ -131,24 +120,20 @@ static bool getSCEVStartAndStride(const SCEVHandle &SH, Loop *L, Loop *UseLoop,
if (containsAddRecFromDifferentLoop(AddRecStart, L))
return false;
- if (isSExt || isZExt)
- Start = SE->getTruncateExpr(Start, AddRec->getType());
-
Start = SE->getAddExpr(Start, AddRecStart);
- if (!isa<SCEVConstant>(AddRec->getStepRecurrence(*SE))) {
- // If stride is an instruction, make sure it dominates the loop preheader.
- // Otherwise we could end up with a use before def situation.
+ // If stride is an instruction, make sure it dominates the loop preheader.
+ // Otherwise we could end up with a use before def situation.
+ if (!isa<SCEVConstant>(AddRecStride)) {
BasicBlock *Preheader = L->getLoopPreheader();
- if (!AddRec->getStepRecurrence(*SE)->dominates(Preheader, DT))
+ if (!AddRecStride->dominates(Preheader, DT))
return false;
DOUT << "[" << L->getHeader()->getName()
<< "] Variable stride: " << *AddRec << "\n";
}
- Stride = AddRec->getStepRecurrence(*SE);
- isSigned = isSExt;
+ Stride = AddRecStride;
return true;
}
@@ -218,9 +203,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
Loop *UseLoop = LI->getLoopFor(I->getParent());
SCEVHandle Start = SE->getIntegerSCEV(0, ISE->getType());
SCEVHandle Stride = Start;
- bool isSigned = false; // Arbitrary initial value - pacifies compiler.
- if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, isSigned, SE, DT))
+ if (!getSCEVStartAndStride(ISE, L, UseLoop, Start, Stride, SE, DT))
return false; // Non-reducible symbolic expression, bail out.
SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -271,11 +255,11 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// The value used will be incremented by the stride more than we are
// expecting, so subtract this off.
SCEVHandle NewStart = SE->getMinusSCEV(Start, Stride);
- StrideUses->addUser(NewStart, User, I, isSigned);
+ StrideUses->addUser(NewStart, User, I);
StrideUses->Users.back().setIsUseOfPostIncrementedValue(true);
DOUT << " USING POSTINC SCEV, START=" << *NewStart<< "\n";
} else {
- StrideUses->addUser(Start, User, I, isSigned);
+ StrideUses->addUser(Start, User, I);
}
}
}
@@ -312,7 +296,6 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
/// getReplacementExpr - Return a SCEV expression which computes the
/// value of the OperandValToReplace of the given IVStrideUse.
SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
- const Type *UseTy = U.getOperandValToReplace()->getType();
// Start with zero.
SCEVHandle RetVal = SE->getIntegerSCEV(0, U.getParent()->Stride->getType());
// Create the basic add recurrence.
@@ -326,17 +309,9 @@ SCEVHandle IVUsers::getReplacementExpr(const IVStrideUse &U) const {
// Evaluate the expression out of the loop, if possible.
if (!L->contains(U.getUser()->getParent())) {
SCEVHandle ExitVal = SE->getSCEVAtScope(RetVal, L->getParentLoop());
- if (!isa<SCEVCouldNotCompute>(ExitVal) && ExitVal->isLoopInvariant(L))
+ if (ExitVal->isLoopInvariant(L))
RetVal = ExitVal;
}
- // Promote the result to the type of the use.
- if (SE->getTypeSizeInBits(RetVal->getType()) !=
- SE->getTypeSizeInBits(UseTy)) {
- if (U.isSigned())
- RetVal = SE->getSignExtendExpr(RetVal, UseTy);
- else
- RetVal = SE->getZeroExtendExpr(RetVal, UseTy);
- }
return RetVal;
}
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 98ab6f4..68aa595 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -68,6 +68,7 @@
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Support/CommandLine.h"
@@ -132,7 +133,8 @@ bool SCEV::isOne() const {
return false;
}
-SCEVCouldNotCompute::SCEVCouldNotCompute() : SCEV(scCouldNotCompute) {}
+SCEVCouldNotCompute::SCEVCouldNotCompute(const ScalarEvolution* p) :
+ SCEV(scCouldNotCompute, p) {}
SCEVCouldNotCompute::~SCEVCouldNotCompute() {}
bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const {
@@ -178,7 +180,7 @@ SCEVConstant::~SCEVConstant() {
SCEVHandle ScalarEvolution::getConstant(ConstantInt *V) {
SCEVConstant *&R = (*SCEVConstants)[V];
- if (R == 0) R = new SCEVConstant(V);
+ if (R == 0) R = new SCEVConstant(V, this);
return R;
}
@@ -186,6 +188,11 @@ SCEVHandle ScalarEvolution::getConstant(const APInt& Val) {
return getConstant(ConstantInt::get(Val));
}
+SCEVHandle
+ScalarEvolution::getConstant(const Type *Ty, uint64_t V, bool isSigned) {
+ return getConstant(ConstantInt::get(cast<IntegerType>(Ty), V, isSigned));
+}
+
const Type *SCEVConstant::getType() const { return V->getType(); }
void SCEVConstant::print(raw_ostream &OS) const {
@@ -193,8 +200,9 @@ void SCEVConstant::print(raw_ostream &OS) const {
}
SCEVCastExpr::SCEVCastExpr(unsigned SCEVTy,
- const SCEVHandle &op, const Type *ty)
- : SCEV(SCEVTy), Op(op), Ty(ty) {}
+ const SCEVHandle &op, const Type *ty,
+ const ScalarEvolution* p)
+ : SCEV(SCEVTy, p), Op(op), Ty(ty) {}
SCEVCastExpr::~SCEVCastExpr() {}
@@ -208,8 +216,9 @@ bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
SCEVTruncateExpr*> > SCEVTruncates;
-SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty)
- : SCEVCastExpr(scTruncate, op, ty) {
+SCEVTruncateExpr::SCEVTruncateExpr(const SCEVHandle &op, const Type *ty,
+ const ScalarEvolution* p)
+ : SCEVCastExpr(scTruncate, op, ty, p) {
assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
(Ty->isInteger() || isa<PointerType>(Ty)) &&
"Cannot truncate non-integer value!");
@@ -229,8 +238,9 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
SCEVZeroExtendExpr*> > SCEVZeroExtends;
-SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty)
- : SCEVCastExpr(scZeroExtend, op, ty) {
+SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEVHandle &op, const Type *ty,
+ const ScalarEvolution* p)
+ : SCEVCastExpr(scZeroExtend, op, ty, p) {
assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
(Ty->isInteger() || isa<PointerType>(Ty)) &&
"Cannot zero extend non-integer value!");
@@ -250,8 +260,9 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
static ManagedStatic<std::map<std::pair<const SCEV*, const Type*>,
SCEVSignExtendExpr*> > SCEVSignExtends;
-SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty)
- : SCEVCastExpr(scSignExtend, op, ty) {
+SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEVHandle &op, const Type *ty,
+ const ScalarEvolution* p)
+ : SCEVCastExpr(scSignExtend, op, ty, p) {
assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
(Ty->isInteger() || isa<PointerType>(Ty)) &&
"Cannot sign extend non-integer value!");
@@ -293,7 +304,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
SCEVHandle H =
getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
if (H != getOperand(i)) {
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 8> NewOps;
NewOps.reserve(getNumOperands());
for (unsigned j = 0; j != i; ++j)
NewOps.push_back(getOperand(j));
@@ -373,7 +384,7 @@ replaceSymbolicValuesWithConcrete(const SCEVHandle &Sym,
SCEVHandle H =
getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
if (H != getOperand(i)) {
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 8> NewOps;
NewOps.reserve(getNumOperands());
for (unsigned j = 0; j != i; ++j)
NewOps.push_back(getOperand(j));
@@ -504,9 +515,18 @@ namespace {
return false;
}
- // Constant sorting doesn't matter since they'll be folded.
- if (isa<SCEVConstant>(LHS))
- return false;
+ // Compare constant values.
+ if (const SCEVConstant *LC = dyn_cast<SCEVConstant>(LHS)) {
+ const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+ return LC->getValue()->getValue().ult(RC->getValue()->getValue());
+ }
+
+ // Compare addrec loop depths.
+ if (const SCEVAddRecExpr *LA = dyn_cast<SCEVAddRecExpr>(LHS)) {
+ const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+ if (LA->getLoop()->getLoopDepth() != RA->getLoop()->getLoopDepth())
+ return LA->getLoop()->getLoopDepth() < RA->getLoop()->getLoopDepth();
+ }
// Lexicographically compare n-ary expressions.
if (const SCEVNAryExpr *LC = dyn_cast<SCEVNAryExpr>(LHS)) {
@@ -558,7 +578,7 @@ namespace {
/// this to depend on where the addresses of various SCEV objects happened to
/// land in memory.
///
-static void GroupByComplexity(std::vector<SCEVHandle> &Ops,
+static void GroupByComplexity(SmallVectorImpl<SCEVHandle> &Ops,
LoopInfo *LI) {
if (Ops.size() < 2) return; // Noop
if (Ops.size() == 2) {
@@ -763,17 +783,16 @@ SCEVHandle ScalarEvolution::getTruncateExpr(const SCEVHandle &Op,
if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op))
return getTruncateOrZeroExtend(SZ->getOperand(), Ty);
- // If the input value is a chrec scev made out of constants, truncate
- // all of the constants.
+ // If the input value is a chrec scev, truncate the chrec's operands.
if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {
- std::vector<SCEVHandle> Operands;
+ SmallVector<SCEVHandle, 4> Operands;
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i)
Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty));
return getAddRecExpr(Operands, AddRec->getLoop());
}
SCEVTruncateExpr *&Result = (*SCEVTruncates)[std::make_pair(Op, Ty)];
- if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty);
+ if (Result == 0) Result = new SCEVTruncateExpr(Op, Ty, this);
return Result;
}
@@ -861,7 +880,7 @@ SCEVHandle ScalarEvolution::getZeroExtendExpr(const SCEVHandle &Op,
}
SCEVZeroExtendExpr *&Result = (*SCEVZeroExtends)[std::make_pair(Op, Ty)];
- if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty);
+ if (Result == 0) Result = new SCEVZeroExtendExpr(Op, Ty, this);
return Result;
}
@@ -933,7 +952,7 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op,
}
SCEVSignExtendExpr *&Result = (*SCEVSignExtends)[std::make_pair(Op, Ty)];
- if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty);
+ if (Result == 0) Result = new SCEVSignExtendExpr(Op, Ty, this);
return Result;
}
@@ -979,9 +998,105 @@ SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op,
return ZExt;
}
+/// CollectAddOperandsWithScales - Process the given Ops list, which is
+/// a list of operands to be added under the given scale, update the given
+/// map. This is a helper function for getAddRecExpr. As an example of
+/// what it does, given a sequence of operands that would form an add
+/// expression like this:
+///
+/// m + n + 13 + (A * (o + p + (B * q + m + 29))) + r + (-1 * r)
+///
+/// where A and B are constants, update the map with these values:
+///
+/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0)
+///
+/// and add 13 + A*B*29 to AccumulatedConstant.
+/// This will allow getAddRecExpr to produce this:
+///
+/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B)
+///
+/// This form often exposes folding opportunities that are hidden in
+/// the original operand list.
+///
+/// Return true iff it appears that any interesting folding opportunities
+/// may be exposed. This helps getAddRecExpr short-circuit extra work in
+/// the common case where no interesting opportunities are present, and
+/// is also used as a check to avoid infinite recursion.
+///
+static bool
+CollectAddOperandsWithScales(DenseMap<SCEVHandle, APInt> &M,
+ SmallVector<SCEVHandle, 8> &NewOps,
+ APInt &AccumulatedConstant,
+ const SmallVectorImpl<SCEVHandle> &Ops,
+ const APInt &Scale,
+ ScalarEvolution &SE) {
+ bool Interesting = false;
+
+ // Iterate over the add operands.
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);
+ if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {
+ APInt NewScale =
+ Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue();
+ if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {
+ // A multiplication of a constant with another add; recurse.
+ Interesting |=
+ CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ cast<SCEVAddExpr>(Mul->getOperand(1))
+ ->getOperands(),
+ NewScale, SE);
+ } else {
+ // A multiplication of a constant with some other value. Update
+ // the map.
+ SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin()+1, Mul->op_end());
+ SCEVHandle Key = SE.getMulExpr(MulOps);
+ std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Key, APInt()));
+ if (Pair.second) {
+ Pair.first->second = NewScale;
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += NewScale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+ // Pull a buried constant out to the outside.
+ if (Scale != 1 || AccumulatedConstant != 0 || C->isZero())
+ Interesting = true;
+ AccumulatedConstant += Scale * C->getValue()->getValue();
+ } else {
+ // An ordinary operand. Update the map.
+ std::pair<DenseMap<SCEVHandle, APInt>::iterator, bool> Pair =
+ M.insert(std::make_pair(Ops[i], APInt()));
+ if (Pair.second) {
+ Pair.first->second = Scale;
+ NewOps.push_back(Pair.first->first);
+ } else {
+ Pair.first->second += Scale;
+ // The map already had an entry for this value, which may indicate
+ // a folding opportunity.
+ Interesting = true;
+ }
+ }
+ }
+
+ return Interesting;
+}
+
+namespace {
+ struct APIntCompare {
+ bool operator()(const APInt &LHS, const APInt &RHS) const {
+ return LHS.ult(RHS);
+ }
+ };
+}
+
/// getAddExpr - Get a canonical add expression, or something simpler if
/// possible.
-SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
+SCEVHandle ScalarEvolution::getAddExpr(SmallVectorImpl<SCEVHandle> &Ops) {
assert(!Ops.empty() && "Cannot get empty add!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
@@ -1001,11 +1116,10 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
assert(Idx < Ops.size());
while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
// We found two constants, fold them together!
- ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() +
- RHSC->getValue()->getValue());
- Ops[0] = getConstant(Fold);
+ Ops[0] = getConstant(LHSC->getValue()->getValue() +
+ RHSC->getValue()->getValue());
+ if (Ops.size() == 2) return Ops[0];
Ops.erase(Ops.begin()+1); // Erase the folded element
- if (Ops.size() == 1) return Ops[0];
LHSC = cast<SCEVConstant>(Ops[0]);
}
@@ -1043,7 +1157,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
const Type *DstType = Trunc->getType();
const Type *SrcType = Trunc->getOperand()->getType();
- std::vector<SCEVHandle> LargeOps;
+ SmallVector<SCEVHandle, 8> LargeOps;
bool Ok = true;
// Check all the operands to see if they can be represented in the
// source type of the truncate.
@@ -1059,7 +1173,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
// is much more likely to be foldable here.
LargeOps.push_back(getSignExtendExpr(C, SrcType));
} else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
- std::vector<SCEVHandle> LargeMulOps;
+ SmallVector<SCEVHandle, 8> LargeMulOps;
for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
if (const SCEVTruncateExpr *T =
dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
@@ -1120,6 +1234,38 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
++Idx;
+ // Check to see if there are any folding opportunities present with
+ // operands multiplied by constant values.
+ if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+ uint64_t BitWidth = getTypeSizeInBits(Ty);
+ DenseMap<SCEVHandle, APInt> M;
+ SmallVector<SCEVHandle, 8> NewOps;
+ APInt AccumulatedConstant(BitWidth, 0);
+ if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+ Ops, APInt(BitWidth, 1), *this)) {
+ // Some interesting folding opportunity is present, so its worthwhile to
+ // re-generate the operands list. Group the operands by constant scale,
+ // to avoid multiplying by the same constant scale multiple times.
+ std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare> MulOpLists;
+ for (SmallVector<SCEVHandle, 8>::iterator I = NewOps.begin(),
+ E = NewOps.end(); I != E; ++I)
+ MulOpLists[M.find(*I)->second].push_back(*I);
+ // Re-generate the operands list.
+ Ops.clear();
+ if (AccumulatedConstant != 0)
+ Ops.push_back(getConstant(AccumulatedConstant));
+ for (std::map<APInt, SmallVector<SCEVHandle, 4>, APIntCompare>::iterator I =
+ MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+ if (I->first != 0)
+ Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second)));
+ if (Ops.empty())
+ return getIntegerSCEV(0, Ty);
+ if (Ops.size() == 1)
+ return Ops[0];
+ return getAddExpr(Ops);
+ }
+ }
+
// If we are adding something to a multiply expression, make sure the
// something is not already an operand of the multiply. If so, merge it into
// the multiply.
@@ -1128,13 +1274,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
- if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(MulOpSCEV)) {
+ if (MulOpSCEV == Ops[AddOp] && !isa<SCEVConstant>(Ops[AddOp])) {
// Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1))
SCEVHandle InnerMul = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
// If the multiply has more than two operands, we must get the
// Y*Z term.
- std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+ SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end());
MulOps.erase(MulOps.begin()+MulOp);
InnerMul = getMulExpr(MulOps);
}
@@ -1166,13 +1312,13 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
// Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
SCEVHandle InnerMul1 = Mul->getOperand(MulOp == 0);
if (Mul->getNumOperands() != 2) {
- std::vector<SCEVHandle> MulOps(Mul->op_begin(), Mul->op_end());
+ SmallVector<SCEVHandle, 4> MulOps(Mul->op_begin(), Mul->op_end());
MulOps.erase(MulOps.begin()+MulOp);
InnerMul1 = getMulExpr(MulOps);
}
SCEVHandle InnerMul2 = OtherMul->getOperand(OMulOp == 0);
if (OtherMul->getNumOperands() != 2) {
- std::vector<SCEVHandle> MulOps(OtherMul->op_begin(),
+ SmallVector<SCEVHandle, 4> MulOps(OtherMul->op_begin(),
OtherMul->op_end());
MulOps.erase(MulOps.begin()+OMulOp);
InnerMul2 = getMulExpr(MulOps);
@@ -1199,7 +1345,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
// Scan all of the other operands to this add and add them to the vector if
// they are loop invariant w.r.t. the recurrence.
- std::vector<SCEVHandle> LIOps;
+ SmallVector<SCEVHandle, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1213,7 +1359,8 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
// NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step}
LIOps.push_back(AddRec->getStart());
- std::vector<SCEVHandle> AddRecOps(AddRec->op_begin(), AddRec->op_end());
+ SmallVector<SCEVHandle, 4> AddRecOps(AddRec->op_begin(),
+ AddRec->op_end());
AddRecOps[0] = getAddExpr(LIOps);
SCEVHandle NewRec = getAddRecExpr(AddRecOps, AddRec->getLoop());
@@ -1238,7 +1385,7 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
if (AddRec->getLoop() == OtherAddRec->getLoop()) {
// Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D}
- std::vector<SCEVHandle> NewOps(AddRec->op_begin(), AddRec->op_end());
+ SmallVector<SCEVHandle, 4> NewOps(AddRec->op_begin(), AddRec->op_end());
for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
if (i >= NewOps.size()) {
NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
@@ -1267,14 +1414,14 @@ SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) {
std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scAddExpr,
SCEVOps)];
- if (Result == 0) Result = new SCEVAddExpr(Ops);
+ if (Result == 0) Result = new SCEVAddExpr(Ops, this);
return Result;
}
/// getMulExpr - Get a canonical multiply expression, or something simpler if
/// possible.
-SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
+SCEVHandle ScalarEvolution::getMulExpr(SmallVectorImpl<SCEVHandle> &Ops) {
assert(!Ops.empty() && "Cannot get empty mul!");
#ifndef NDEBUG
for (unsigned i = 1, e = Ops.size(); i != e; ++i)
@@ -1355,7 +1502,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
// Scan all of the other operands to this mul and add them to the vector if
// they are loop invariant w.r.t. the recurrence.
- std::vector<SCEVHandle> LIOps;
+ SmallVector<SCEVHandle, 8> LIOps;
const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
if (Ops[i]->isLoopInvariant(AddRec->getLoop())) {
@@ -1367,7 +1514,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
// If we found some loop invariants, fold them into the recurrence.
if (!LIOps.empty()) {
// NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step}
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 4> NewOps;
NewOps.reserve(AddRec->getNumOperands());
if (LIOps.size() == 1) {
const SCEV *Scale = LIOps[0];
@@ -1375,7 +1522,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i)));
} else {
for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) {
- std::vector<SCEVHandle> MulOps(LIOps);
+ SmallVector<SCEVHandle, 4> MulOps(LIOps.begin(), LIOps.end());
MulOps.push_back(AddRec->getOperand(i));
NewOps.push_back(getMulExpr(MulOps));
}
@@ -1433,7 +1580,7 @@ SCEVHandle ScalarEvolution::getMulExpr(std::vector<SCEVHandle> &Ops) {
SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scMulExpr,
SCEVOps)];
if (Result == 0)
- Result = new SCEVMulExpr(Ops);
+ Result = new SCEVMulExpr(Ops, this);
return Result;
}
@@ -1473,14 +1620,14 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),
getZeroExtendExpr(Step, ExtTy),
AR->getLoop())) {
- std::vector<SCEVHandle> Operands;
+ SmallVector<SCEVHandle, 4> Operands;
for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i)
Operands.push_back(getUDivExpr(AR->getOperand(i), RHS));
return getAddRecExpr(Operands, AR->getLoop());
}
// (A*B)/C --> A*(B/C) if safe and B/C can be folded.
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {
- std::vector<SCEVHandle> Operands;
+ SmallVector<SCEVHandle, 4> Operands;
for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i)
Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy));
if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))
@@ -1489,7 +1636,9 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
SCEVHandle Op = M->getOperand(i);
SCEVHandle Div = getUDivExpr(Op, RHSC);
if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) {
- Operands = M->getOperands();
+ const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands();
+ Operands = SmallVector<SCEVHandle, 4>(MOperands.begin(),
+ MOperands.end());
Operands[i] = Div;
return getMulExpr(Operands);
}
@@ -1497,7 +1646,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
}
// (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.
if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(LHS)) {
- std::vector<SCEVHandle> Operands;
+ SmallVector<SCEVHandle, 4> Operands;
for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i)
Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy));
if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {
@@ -1522,7 +1671,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
}
SCEVUDivExpr *&Result = (*SCEVUDivs)[std::make_pair(LHS, RHS)];
- if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS);
+ if (Result == 0) Result = new SCEVUDivExpr(LHS, RHS, this);
return Result;
}
@@ -1531,7 +1680,7 @@ SCEVHandle ScalarEvolution::getUDivExpr(const SCEVHandle &LHS,
/// Simplify the expression as much as possible.
SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
const SCEVHandle &Step, const Loop *L) {
- std::vector<SCEVHandle> Operands;
+ SmallVector<SCEVHandle, 4> Operands;
Operands.push_back(Start);
if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step))
if (StepChrec->getLoop() == L) {
@@ -1546,7 +1695,7 @@ SCEVHandle ScalarEvolution::getAddRecExpr(const SCEVHandle &Start,
/// getAddRecExpr - Get an add recurrence expression for the specified loop.
/// Simplify the expression as much as possible.
-SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
+SCEVHandle ScalarEvolution::getAddRecExpr(SmallVectorImpl<SCEVHandle> &Operands,
const Loop *L) {
if (Operands.size() == 1) return Operands[0];
#ifndef NDEBUG
@@ -1565,8 +1714,8 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {
const Loop* NestedLoop = NestedAR->getLoop();
if (L->getLoopDepth() < NestedLoop->getLoopDepth()) {
- std::vector<SCEVHandle> NestedOperands(NestedAR->op_begin(),
- NestedAR->op_end());
+ SmallVector<SCEVHandle, 4> NestedOperands(NestedAR->op_begin(),
+ NestedAR->op_end());
SCEVHandle NestedARHandle(NestedAR);
Operands[0] = NestedAR->getStart();
NestedOperands[0] = getAddRecExpr(Operands, L);
@@ -1576,19 +1725,20 @@ SCEVHandle ScalarEvolution::getAddRecExpr(std::vector<SCEVHandle> &Operands,
std::vector<const SCEV*> SCEVOps(Operands.begin(), Operands.end());
SCEVAddRecExpr *&Result = (*SCEVAddRecExprs)[std::make_pair(L, SCEVOps)];
- if (Result == 0) Result = new SCEVAddRecExpr(Operands, L);
+ if (Result == 0) Result = new SCEVAddRecExpr(Operands, L, this);
return Result;
}
SCEVHandle ScalarEvolution::getSMaxExpr(const SCEVHandle &LHS,
const SCEVHandle &RHS) {
- std::vector<SCEVHandle> Ops;
+ SmallVector<SCEVHandle, 2> Ops;
Ops.push_back(LHS);
Ops.push_back(RHS);
return getSMaxExpr(Ops);
}
-SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
+SCEVHandle
+ScalarEvolution::getSMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) {
assert(!Ops.empty() && "Cannot get empty smax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
@@ -1662,19 +1812,20 @@ SCEVHandle ScalarEvolution::getSMaxExpr(std::vector<SCEVHandle> Ops) {
std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scSMaxExpr,
SCEVOps)];
- if (Result == 0) Result = new SCEVSMaxExpr(Ops);
+ if (Result == 0) Result = new SCEVSMaxExpr(Ops, this);
return Result;
}
SCEVHandle ScalarEvolution::getUMaxExpr(const SCEVHandle &LHS,
const SCEVHandle &RHS) {
- std::vector<SCEVHandle> Ops;
+ SmallVector<SCEVHandle, 2> Ops;
Ops.push_back(LHS);
Ops.push_back(RHS);
return getUMaxExpr(Ops);
}
-SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
+SCEVHandle
+ScalarEvolution::getUMaxExpr(SmallVectorImpl<SCEVHandle> &Ops) {
assert(!Ops.empty() && "Cannot get empty umax!");
if (Ops.size() == 1) return Ops[0];
#ifndef NDEBUG
@@ -1748,17 +1899,29 @@ SCEVHandle ScalarEvolution::getUMaxExpr(std::vector<SCEVHandle> Ops) {
std::vector<const SCEV*> SCEVOps(Ops.begin(), Ops.end());
SCEVCommutativeExpr *&Result = (*SCEVCommExprs)[std::make_pair(scUMaxExpr,
SCEVOps)];
- if (Result == 0) Result = new SCEVUMaxExpr(Ops);
+ if (Result == 0) Result = new SCEVUMaxExpr(Ops, this);
return Result;
}
+SCEVHandle ScalarEvolution::getSMinExpr(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ // ~smax(~x, ~y) == smin(x, y).
+ return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
+SCEVHandle ScalarEvolution::getUMinExpr(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ // ~umax(~x, ~y) == umin(x, y)
+ return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
+}
+
SCEVHandle ScalarEvolution::getUnknown(Value *V) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
return getConstant(CI);
if (isa<ConstantPointerNull>(V))
return getIntegerSCEV(0, V->getType());
SCEVUnknown *&Result = (*SCEVUnknowns)[V];
- if (Result == 0) Result = new SCEVUnknown(V);
+ if (Result == 0) Result = new SCEVUnknown(V, this);
return Result;
}
@@ -1977,6 +2140,22 @@ ScalarEvolution::getTruncateOrNoop(const SCEVHandle &V, const Type *Ty) {
return getTruncateExpr(V, Ty);
}
+/// getUMaxFromMismatchedTypes - Promote the operands to the wider of
+/// the types using zero-extension, and then perform a umax operation
+/// with them.
+SCEVHandle ScalarEvolution::getUMaxFromMismatchedTypes(const SCEVHandle &LHS,
+ const SCEVHandle &RHS) {
+ SCEVHandle PromotedLHS = LHS;
+ SCEVHandle PromotedRHS = RHS;
+
+ if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType()))
+ PromotedRHS = getZeroExtendExpr(RHS, LHS->getType());
+ else
+ PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType());
+
+ return getUMaxExpr(PromotedLHS, PromotedRHS);
+}
+
/// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
/// the specified instruction and replaces any references to the symbolic value
/// SymName with the specified value. This is used during PHI resolution.
@@ -2040,7 +2219,7 @@ SCEVHandle ScalarEvolution::createNodeForPHI(PHINode *PN) {
if (FoundIndex != Add->getNumOperands()) {
// Create an add with everything but the specified operand.
- std::vector<SCEVHandle> Ops;
+ SmallVector<SCEVHandle, 8> Ops;
for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i)
if (i != FoundIndex)
Ops.push_back(Add->getOperand(i));
@@ -2143,73 +2322,134 @@ SCEVHandle ScalarEvolution::createNodeForGEP(User *GEP) {
/// guaranteed to end in (at every loop iteration). It is, at the same time,
/// the minimum number of times S is divisible by 2. For example, given {4,+,8}
/// it returns 2. If S is guaranteed to be 0, it returns the bitwidth of S.
-static uint32_t GetMinTrailingZeros(SCEVHandle S, const ScalarEvolution &SE) {
+uint32_t
+ScalarEvolution::GetMinTrailingZeros(const SCEVHandle &S) {
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
return C->getValue()->getValue().countTrailingZeros();
if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))
- return std::min(GetMinTrailingZeros(T->getOperand(), SE),
- (uint32_t)SE.getTypeSizeInBits(T->getType()));
+ return std::min(GetMinTrailingZeros(T->getOperand()),
+ (uint32_t)getTypeSizeInBits(T->getType()));
if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) {
- uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
- return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
- SE.getTypeSizeInBits(E->getType()) : OpRes;
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
}
if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) {
- uint32_t OpRes = GetMinTrailingZeros(E->getOperand(), SE);
- return OpRes == SE.getTypeSizeInBits(E->getOperand()->getType()) ?
- SE.getTypeSizeInBits(E->getType()) : OpRes;
+ uint32_t OpRes = GetMinTrailingZeros(E->getOperand());
+ return OpRes == getTypeSizeInBits(E->getOperand()->getType()) ?
+ getTypeSizeInBits(E->getType()) : OpRes;
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
// The result is the min of all operands results.
- uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
- MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
return MinOpRes;
}
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
// The result is the sum of all operands results.
- uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
- uint32_t BitWidth = SE.getTypeSizeInBits(M->getType());
+ uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0));
+ uint32_t BitWidth = getTypeSizeInBits(M->getType());
for (unsigned i = 1, e = M->getNumOperands();
SumOpRes != BitWidth && i != e; ++i)
- SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i), SE),
+ SumOpRes = std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)),
BitWidth);
return SumOpRes;
}
if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
// The result is the min of all operands results.
- uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0), SE);
+ uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0));
for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i)
- MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i), SE));
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i)));
return MinOpRes;
}
if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) {
// The result is the min of all operands results.
- uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
- MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
return MinOpRes;
}
if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) {
// The result is the min of all operands results.
- uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0), SE);
+ uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0));
for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i)
- MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i), SE));
+ MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i)));
return MinOpRes;
}
- // SCEVUDivExpr, SCEVUnknown
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ unsigned BitWidth = getTypeSizeInBits(U->getType());
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones);
+ return Zeros.countTrailingOnes();
+ }
+
+ // SCEVUDivExpr
return 0;
}
+uint32_t
+ScalarEvolution::GetMinLeadingZeros(const SCEVHandle &S) {
+ // TODO: Handle other SCEV expression types here.
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S))
+ return C->getValue()->getValue().countLeadingZeros();
+
+ if (const SCEVZeroExtendExpr *C = dyn_cast<SCEVZeroExtendExpr>(S)) {
+ // A zero-extension cast adds zero bits.
+ return GetMinLeadingZeros(C->getOperand()) +
+ (getTypeSizeInBits(C->getType()) -
+ getTypeSizeInBits(C->getOperand()->getType()));
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ unsigned BitWidth = getTypeSizeInBits(U->getType());
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
+ ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
+ return Zeros.countLeadingOnes();
+ }
+
+ return 1;
+}
+
+uint32_t
+ScalarEvolution::GetMinSignBits(const SCEVHandle &S) {
+ // TODO: Handle other SCEV expression types here.
+
+ if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
+ const APInt &A = C->getValue()->getValue();
+ return A.isNegative() ? A.countLeadingOnes() :
+ A.countLeadingZeros();
+ }
+
+ if (const SCEVSignExtendExpr *C = dyn_cast<SCEVSignExtendExpr>(S)) {
+ // A sign-extension cast adds sign bits.
+ return GetMinSignBits(C->getOperand()) +
+ (getTypeSizeInBits(C->getType()) -
+ getTypeSizeInBits(C->getOperand()->getType()));
+ }
+
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ // For a SCEVUnknown, ask ValueTracking.
+ return ComputeNumSignBits(U->getValue(), TD);
+ }
+
+ return 1;
+}
+
/// createSCEV - We know that there is no SCEV for the specified value.
/// Analyze the expression.
///
@@ -2248,14 +2488,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
if (CI->isAllOnesValue())
return getSCEV(U->getOperand(0));
const APInt &A = CI->getValue();
- unsigned Ones = A.countTrailingOnes();
- if (APIntOps::isMask(Ones, A))
+
+ // Instcombine's ShrinkDemandedConstant may strip bits out of
+ // constants, obscuring what would otherwise be a low-bits mask.
+ // Use ComputeMaskedBits to compute what ShrinkDemandedConstant
+ // knew about to reconstruct a low-bits mask value.
+ unsigned LZ = A.countLeadingZeros();
+ unsigned BitWidth = A.getBitWidth();
+ APInt AllOnes = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
+ ComputeMaskedBits(U->getOperand(0), AllOnes, KnownZero, KnownOne, TD);
+
+ APInt EffectiveMask = APInt::getLowBitsSet(BitWidth, BitWidth - LZ);
+
+ if (LZ != 0 && !((~A & ~KnownZero) & EffectiveMask))
return
getZeroExtendExpr(getTruncateExpr(getSCEV(U->getOperand(0)),
- IntegerType::get(Ones)),
+ IntegerType::get(BitWidth - LZ)),
U->getType());
}
break;
+
case Instruction::Or:
// If the RHS of the Or is a constant, we may have something like:
// X*4+1 which got turned into X*4|1. Handle this as an Add so loop
@@ -2266,7 +2519,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
SCEVHandle LHS = getSCEV(U->getOperand(0));
const APInt &CIVal = CI->getValue();
- if (GetMinTrailingZeros(LHS, *this) >=
+ if (GetMinTrailingZeros(LHS) >=
(CIVal.getBitWidth() - CIVal.countLeadingZeros()))
return getAddExpr(LHS, getSCEV(U->getOperand(1)));
}
@@ -2292,9 +2545,27 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
if (BO->getOpcode() == Instruction::And &&
LCI->getValue() == CI->getValue())
if (const SCEVZeroExtendExpr *Z =
- dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0))))
- return getZeroExtendExpr(getNotSCEV(Z->getOperand()),
- U->getType());
+ dyn_cast<SCEVZeroExtendExpr>(getSCEV(U->getOperand(0)))) {
+ const Type *UTy = U->getType();
+ SCEVHandle Z0 = Z->getOperand();
+ const Type *Z0Ty = Z0->getType();
+ unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
+
+ // If C is a low-bits mask, the zero extend is zerving to
+ // mask off the high bits. Complement the operand and
+ // re-apply the zext.
+ if (APIntOps::isMask(Z0TySize, CI->getValue()))
+ return getZeroExtendExpr(getNotSCEV(Z0), UTy);
+
+ // If C is a single bit, it may be in the sign-bit position
+ // before the zero-extend. In this case, represent the xor
+ // using an add, which is equivalent, and re-apply the zext.
+ APInt Trunc = APInt(CI->getValue()).trunc(Z0TySize);
+ if (APInt(Trunc).zext(getTypeSizeInBits(UTy)) == CI->getValue() &&
+ Trunc.isSignBit())
+ return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)),
+ UTy);
+ }
}
break;
@@ -2385,10 +2656,7 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
return getSMaxExpr(getSCEV(LHS), getSCEV(RHS));
else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
- // ~smax(~x, ~y) == smin(x, y).
- return getNotSCEV(getSMaxExpr(
- getNotSCEV(getSCEV(LHS)),
- getNotSCEV(getSCEV(RHS))));
+ return getSMinExpr(getSCEV(LHS), getSCEV(RHS));
break;
case ICmpInst::ICMP_ULT:
case ICmpInst::ICMP_ULE:
@@ -2399,9 +2667,25 @@ SCEVHandle ScalarEvolution::createSCEV(Value *V) {
if (LHS == U->getOperand(1) && RHS == U->getOperand(2))
return getUMaxExpr(getSCEV(LHS), getSCEV(RHS));
else if (LHS == U->getOperand(2) && RHS == U->getOperand(1))
- // ~umax(~x, ~y) == umin(x, y)
- return getNotSCEV(getUMaxExpr(getNotSCEV(getSCEV(LHS)),
- getNotSCEV(getSCEV(RHS))));
+ return getUMinExpr(getSCEV(LHS), getSCEV(RHS));
+ break;
+ case ICmpInst::ICMP_NE:
+ // n != 0 ? n : 1 -> umax(n, 1)
+ if (LHS == U->getOperand(1) &&
+ isa<ConstantInt>(U->getOperand(2)) &&
+ cast<ConstantInt>(U->getOperand(2))->isOne() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero())
+ return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(2)));
+ break;
+ case ICmpInst::ICMP_EQ:
+ // n == 0 ? 1 : n -> umax(n, 1)
+ if (LHS == U->getOperand(2) &&
+ isa<ConstantInt>(U->getOperand(1)) &&
+ cast<ConstantInt>(U->getOperand(1))->isOne() &&
+ isa<ConstantInt>(RHS) &&
+ cast<ConstantInt>(RHS)->isZero())
+ return getUMaxExpr(getSCEV(LHS), getSCEV(U->getOperand(1)));
break;
default:
break;
@@ -2462,9 +2746,13 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Update the value in the map.
Pair.first->second = ItCount;
- } else if (isa<PHINode>(L->getHeader()->begin())) {
- // Only count loops that have phi nodes as not being computable.
- ++NumTripCountsNotComputed;
+ } else {
+ if (ItCount.Max != CouldNotCompute)
+ // Update the value in the map.
+ Pair.first->second = ItCount;
+ if (isa<PHINode>(L->getHeader()->begin()))
+ // Only count loops that have phi nodes as not being computable.
+ ++NumTripCountsNotComputed;
}
// Now that we know more about the trip count for this loop, forget any
@@ -2520,19 +2808,58 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
- // If the loop has a non-one exit block count, we can't analyze it.
- BasicBlock *ExitBlock = L->getExitBlock();
- if (!ExitBlock)
- return CouldNotCompute;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ // Examine all exits and pick the most conservative values.
+ SCEVHandle BECount = CouldNotCompute;
+ SCEVHandle MaxBECount = CouldNotCompute;
+ bool CouldNotComputeBECount = false;
+ bool CouldNotComputeMaxBECount = false;
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BackedgeTakenInfo NewBTI =
+ ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
+
+ if (NewBTI.Exact == CouldNotCompute) {
+ // We couldn't compute an exact value for this exit, so
+ // we don't be able to compute an exact value for the loop.
+ CouldNotComputeBECount = true;
+ BECount = CouldNotCompute;
+ } else if (!CouldNotComputeBECount) {
+ if (BECount == CouldNotCompute)
+ BECount = NewBTI.Exact;
+ else {
+ // TODO: More analysis could be done here. For example, a
+ // loop with a short-circuiting && operator has an exact count
+ // of the min of both sides.
+ CouldNotComputeBECount = true;
+ BECount = CouldNotCompute;
+ }
+ }
+ if (NewBTI.Max == CouldNotCompute) {
+ // We couldn't compute an maximum value for this exit, so
+ // we don't be able to compute an maximum value for the loop.
+ CouldNotComputeMaxBECount = true;
+ MaxBECount = CouldNotCompute;
+ } else if (!CouldNotComputeMaxBECount) {
+ if (MaxBECount == CouldNotCompute)
+ MaxBECount = NewBTI.Max;
+ else
+ MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+ }
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+}
- // Okay, there is one exit block. Try to find the condition that causes the
- // loop to be exited.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return CouldNotCompute; // More than one block exiting!
+/// ComputeBackedgeTakenCountFromExit - Compute the number of times the backedge
+/// of the specified loop will execute if it exits via the specified block.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
+ BasicBlock *ExitingBlock) {
- // Okay, we've computed the exiting block. See what condition causes us to
- // exit.
+ // Okay, we've chosen an exiting block. See what condition causes us to
+ // exit at this block.
//
// FIXME: we should be able to handle switch instructions (with a single exit)
BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
@@ -2547,23 +2874,154 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
// Currently we check for this by checking to see if the Exit branch goes to
// the loop header. If so, we know it will always execute the same number of
// times as the loop. We also handle the case where the exit block *is* the
- // loop header. This is common for un-rotated loops. More extensive analysis
- // could be done to handle more cases here.
+ // loop header. This is common for un-rotated loops.
+ //
+ // If both of those tests fail, walk up the unique predecessor chain to the
+ // header, stopping if there is an edge that doesn't exit the loop. If the
+ // header is reached, the execution count of the branch will be equal to the
+ // trip count of the loop.
+ //
+ // More extensive analysis could be done to handle more cases here.
+ //
if (ExitBr->getSuccessor(0) != L->getHeader() &&
ExitBr->getSuccessor(1) != L->getHeader() &&
- ExitBr->getParent() != L->getHeader())
- return CouldNotCompute;
-
- ICmpInst *ExitCond = dyn_cast<ICmpInst>(ExitBr->getCondition());
+ ExitBr->getParent() != L->getHeader()) {
+ // The simple checks failed, try climbing the unique predecessor chain
+ // up to the header.
+ bool Ok = false;
+ for (BasicBlock *BB = ExitBr->getParent(); BB; ) {
+ BasicBlock *Pred = BB->getUniquePredecessor();
+ if (!Pred)
+ return CouldNotCompute;
+ TerminatorInst *PredTerm = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *PredSucc = PredTerm->getSuccessor(i);
+ if (PredSucc == BB)
+ continue;
+ // If the predecessor has a successor that isn't BB and isn't
+ // outside the loop, assume the worst.
+ if (L->contains(PredSucc))
+ return CouldNotCompute;
+ }
+ if (Pred == L->getHeader()) {
+ Ok = true;
+ break;
+ }
+ BB = Pred;
+ }
+ if (!Ok)
+ return CouldNotCompute;
+ }
+
+ // Procede to the next level to examine the exit condition expression.
+ return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
+ ExitBr->getSuccessor(0),
+ ExitBr->getSuccessor(1));
+}
+
+/// ComputeBackedgeTakenCountFromExitCond - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
+ Value *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
+ // Check if the controlling expression for this loop is an and or or. In
+ // such cases, an exact backedge-taken count may be infeasible, but a
+ // maximum count may still be feasible.
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
+ if (BO->getOpcode() == Instruction::And) {
+ // Recurse on the operands of the and.
+ BackedgeTakenInfo BTI0 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+ BackedgeTakenInfo BTI1 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ SCEVHandle BECount = CouldNotCompute;
+ SCEVHandle MaxBECount = CouldNotCompute;
+ if (L->contains(TBB)) {
+ // Both conditions must be true for the loop to continue executing.
+ // Choose the less conservative count.
+ // TODO: Take the minimum of the exact counts.
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
+ // TODO: Take the minimum of the maximum counts.
+ if (BTI0.Max == CouldNotCompute)
+ MaxBECount = BTI1.Max;
+ else if (BTI1.Max == CouldNotCompute)
+ MaxBECount = BTI0.Max;
+ else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max))
+ if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max))
+ MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(),
+ C1->getValue()->getValue()));
+ } else {
+ // Both conditions must be true for the loop to exit.
+ assert(L->contains(FBB) && "Loop block has no successor in loop!");
+ if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+ BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+ if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+ MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+ if (BO->getOpcode() == Instruction::Or) {
+ // Recurse on the operands of the or.
+ BackedgeTakenInfo BTI0 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(0), TBB, FBB);
+ BackedgeTakenInfo BTI1 =
+ ComputeBackedgeTakenCountFromExitCond(L, BO->getOperand(1), TBB, FBB);
+ SCEVHandle BECount = CouldNotCompute;
+ SCEVHandle MaxBECount = CouldNotCompute;
+ if (L->contains(FBB)) {
+ // Both conditions must be false for the loop to continue executing.
+ // Choose the less conservative count.
+ // TODO: Take the minimum of the exact counts.
+ if (BTI0.Exact == BTI1.Exact)
+ BECount = BTI0.Exact;
+ // TODO: Take the minimum of the maximum counts.
+ if (BTI0.Max == CouldNotCompute)
+ MaxBECount = BTI1.Max;
+ else if (BTI1.Max == CouldNotCompute)
+ MaxBECount = BTI0.Max;
+ else if (const SCEVConstant *C0 = dyn_cast<SCEVConstant>(BTI0.Max))
+ if (const SCEVConstant *C1 = dyn_cast<SCEVConstant>(BTI1.Max))
+ MaxBECount = getConstant(APIntOps::umin(C0->getValue()->getValue(),
+ C1->getValue()->getValue()));
+ } else {
+ // Both conditions must be false for the loop to exit.
+ assert(L->contains(TBB) && "Loop block has no successor in loop!");
+ if (BTI0.Exact != CouldNotCompute && BTI1.Exact != CouldNotCompute)
+ BECount = getUMaxFromMismatchedTypes(BTI0.Exact, BTI1.Exact);
+ if (BTI0.Max != CouldNotCompute && BTI1.Max != CouldNotCompute)
+ MaxBECount = getUMaxFromMismatchedTypes(BTI0.Max, BTI1.Max);
+ }
+
+ return BackedgeTakenInfo(BECount, MaxBECount);
+ }
+ }
+
+ // With an icmp, it may be feasible to compute an exact backedge-taken count.
+ // Procede to the next level to examine the icmp.
+ if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
+ return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
// If it's not an integer or pointer comparison then compute it the hard way.
- if (ExitCond == 0)
- return ComputeBackedgeTakenCountExhaustively(L, ExitBr->getCondition(),
- ExitBr->getSuccessor(0) == ExitBlock);
+ return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
+}
+
+/// ComputeBackedgeTakenCountFromExitCondICmp - Compute the number of times the
+/// backedge of the specified loop will execute if its exit condition
+/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
+ ICmpInst *ExitCond,
+ BasicBlock *TBB,
+ BasicBlock *FBB) {
// If the condition was exit on true, convert the condition to exit on false
ICmpInst::Predicate Cond;
- if (ExitBr->getSuccessor(1) == ExitBlock)
+ if (!L->contains(FBB))
Cond = ExitCond->getPredicate();
else
Cond = ExitCond->getInversePredicate();
@@ -2573,7 +3031,12 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
SCEVHandle ItCnt =
ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
- if (!isa<SCEVCouldNotCompute>(ItCnt)) return ItCnt;
+ if (!isa<SCEVCouldNotCompute>(ItCnt)) {
+ unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
+ return BackedgeTakenInfo(ItCnt,
+ isa<SCEVConstant>(ItCnt) ? ItCnt :
+ getConstant(APInt::getMaxValue(BitWidth)-1));
+ }
}
SCEVHandle LHS = getSCEV(ExitCond->getOperand(0));
@@ -2651,8 +3114,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
break;
}
return
- ComputeBackedgeTakenCountExhaustively(L, ExitCond,
- ExitBr->getSuccessor(0) == ExitBlock);
+ ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
}
static ConstantInt *
@@ -2750,7 +3212,7 @@ ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS,
unsigned MaxSteps = MaxBruteForceIterations;
for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) {
ConstantInt *ItCst =
- ConstantInt::get(IdxExpr->getType(), IterationNum);
+ ConstantInt::get(cast<IntegerType>(IdxExpr->getType()), IterationNum);
ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this);
// Form the GEP offset.
@@ -2945,7 +3407,7 @@ ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen)
if (CondVal->getValue() == uint64_t(ExitWhen)) {
ConstantEvolutionLoopExitValue[PN] = PHIVal;
++NumBruteForceTripCountsComputed;
- return getConstant(ConstantInt::get(Type::Int32Ty, IterationNum));
+ return getConstant(Type::Int32Ty, IterationNum);
}
// Compute the value of the PHI node for the next iteration.
@@ -3074,7 +3536,7 @@ SCEVHandle ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
if (OpAtScope != Comm->getOperand(i)) {
// Okay, at least one of these operands is loop variant but might be
// foldable. Build a new instance of the folded commutative expression.
- std::vector<SCEVHandle> NewOps(Comm->op_begin(), Comm->op_begin()+i);
+ SmallVector<SCEVHandle, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i);
NewOps.push_back(OpAtScope);
for (++i; i != e; ++i) {
@@ -3394,6 +3856,29 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {
return 0;
}
+/// HasSameValue - SCEV structural equivalence is usually sufficient for
+/// testing whether two expressions are equal, however for the purposes of
+/// looking for a condition guarding a loop, it can be useful to be a little
+/// more general, since a front-end may have replicated the controlling
+/// expression.
+///
+static bool HasSameValue(const SCEVHandle &A, const SCEVHandle &B) {
+ // Quick check to see if they are the same SCEV.
+ if (A == B) return true;
+
+ // Otherwise, if they're both SCEVUnknown, it's possible that they hold
+ // two different instructions with the same value. Check for this case.
+ if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))
+ if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))
+ if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))
+ if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue()))
+ if (AI->isIdenticalTo(BI))
+ return true;
+
+ // Otherwise assume they may have a different value.
+ return false;
+}
+
/// isLoopGuardedByCond - Test whether entry to the loop is protected by
/// a conditional between LHS and RHS. This is used to help avoid max
/// expressions in loop trip counts.
@@ -3494,15 +3979,43 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
SCEVHandle PreCondLHSSCEV = getSCEV(PreCondLHS);
SCEVHandle PreCondRHSSCEV = getSCEV(PreCondRHS);
- if ((LHS == PreCondLHSSCEV && RHS == PreCondRHSSCEV) ||
- (LHS == getNotSCEV(PreCondRHSSCEV) &&
- RHS == getNotSCEV(PreCondLHSSCEV)))
+ if ((HasSameValue(LHS, PreCondLHSSCEV) &&
+ HasSameValue(RHS, PreCondRHSSCEV)) ||
+ (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+ HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
return true;
}
return false;
}
+/// getBECount - Subtract the end and start values and divide by the step,
+/// rounding up, to get the number of times the backedge is executed. Return
+/// CouldNotCompute if an intermediate computation overflows.
+SCEVHandle ScalarEvolution::getBECount(const SCEVHandle &Start,
+ const SCEVHandle &End,
+ const SCEVHandle &Step) {
+ const Type *Ty = Start->getType();
+ SCEVHandle NegOne = getIntegerSCEV(-1, Ty);
+ SCEVHandle Diff = getMinusSCEV(End, Start);
+ SCEVHandle RoundUp = getAddExpr(Step, NegOne);
+
+ // Add an adjustment to the difference between End and Start so that
+ // the division will effectively round up.
+ SCEVHandle Add = getAddExpr(Diff, RoundUp);
+
+ // Check Add for unsigned overflow.
+ // TODO: More sophisticated things could be done here.
+ const Type *WideTy = IntegerType::get(getTypeSizeInBits(Ty) + 1);
+ SCEVHandle OperandExtendedAdd =
+ getAddExpr(getZeroExtendExpr(Diff, WideTy),
+ getZeroExtendExpr(RoundUp, WideTy));
+ if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd)
+ return CouldNotCompute;
+
+ return getUDivExpr(Add, Step);
+}
+
/// HowManyLessThans - Return the number of times a backedge containing the
/// specified less-than comparison will execute. If not computable, return
/// CouldNotCompute.
@@ -3520,7 +4033,6 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// FORNOW: We only support unit strides.
unsigned BitWidth = getTypeSizeInBits(AddRec->getType());
SCEVHandle Step = AddRec->getStepRecurrence(*this);
- SCEVHandle NegOne = getIntegerSCEV(-1, AddRec->getType());
// TODO: handle non-constant strides.
const SCEVConstant *CStep = dyn_cast<SCEVConstant>(Step);
@@ -3575,22 +4087,20 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
: getUMaxExpr(RHS, Start);
// Determine the maximum constant end value.
- SCEVHandle MaxEnd = isa<SCEVConstant>(End) ? End :
- getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth) :
- APInt::getMaxValue(BitWidth));
+ SCEVHandle MaxEnd =
+ isa<SCEVConstant>(End) ? End :
+ getConstant(isSigned ? APInt::getSignedMaxValue(BitWidth)
+ .ashr(GetMinSignBits(End) - 1) :
+ APInt::getMaxValue(BitWidth)
+ .lshr(GetMinLeadingZeros(End)));
// Finally, we subtract these two values and divide, rounding up, to get
// the number of times the backedge is executed.
- SCEVHandle BECount = getUDivExpr(getAddExpr(getMinusSCEV(End, Start),
- getAddExpr(Step, NegOne)),
- Step);
+ SCEVHandle BECount = getBECount(Start, End, Step);
// The maximum backedge count is similar, except using the minimum start
// value and the maximum end value.
- SCEVHandle MaxBECount = getUDivExpr(getAddExpr(getMinusSCEV(MaxEnd,
- MinStart),
- getAddExpr(Step, NegOne)),
- Step);
+ SCEVHandle MaxBECount = getBECount(MinStart, MaxEnd, Step);;
return BackedgeTakenInfo(BECount, MaxBECount);
}
@@ -3611,7 +4121,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// If the start is a non-zero constant, shift the range to simplify things.
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))
if (!SC->getValue()->isZero()) {
- std::vector<SCEVHandle> Operands(op_begin(), op_end());
+ SmallVector<SCEVHandle, 4> Operands(op_begin(), op_end());
Operands[0] = SE.getIntegerSCEV(0, SC->getType());
SCEVHandle Shifted = SE.getAddRecExpr(Operands, getLoop());
if (const SCEVAddRecExpr *ShiftedAddRec =
@@ -3636,7 +4146,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// iteration exits.
unsigned BitWidth = SE.getTypeSizeInBits(getType());
if (!Range.contains(APInt(BitWidth, 0)))
- return SE.getConstant(ConstantInt::get(getType(),0));
+ return SE.getIntegerSCEV(0, getType());
if (isAffine()) {
// If this is an affine expression then we have this situation:
@@ -3672,7 +4182,7 @@ SCEVHandle SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
// quadratic equation to solve it. To do this, we must frame our problem in
// terms of figuring out when zero is crossed, instead of when
// Range.getUpper() is crossed.
- std::vector<SCEVHandle> NewOps(op_begin(), op_end());
+ SmallVector<SCEVHandle, 4> NewOps(op_begin(), op_end());
NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
SCEVHandle NewAddRec = SE.getAddRecExpr(NewOps, getLoop());
@@ -3783,7 +4293,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)
//===----------------------------------------------------------------------===//
ScalarEvolution::ScalarEvolution()
- : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute()) {
+ : FunctionPass(&ID), CouldNotCompute(new SCEVCouldNotCompute(0)) {
}
bool ScalarEvolution::runOnFunction(Function &F) {
@@ -3847,11 +4357,18 @@ void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
OS << " --> ";
SCEVHandle SV = SE.getSCEV(&*I);
SV->print(OS);
- OS << "\t\t";
- if (const Loop *L = LI->getLoopFor((*I).getParent())) {
- OS << "Exits: ";
- SCEVHandle ExitValue = SE.getSCEVAtScope(&*I, L->getParentLoop());
+ const Loop *L = LI->getLoopFor((*I).getParent());
+
+ SCEVHandle AtUse = SE.getSCEVAtScope(SV, L);
+ if (AtUse != SV) {
+ OS << " --> ";
+ AtUse->print(OS);
+ }
+
+ if (L) {
+ OS << "\t\t" "Exits: ";
+ SCEVHandle ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop());
if (!ExitValue->isLoopInvariant(L)) {
OS << "<<Unknown>>";
} else {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index e1f8fa4..2a73c27 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -182,7 +182,8 @@ static bool FactorOutConstant(SCEVHandle &S,
if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S))
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
if (!C->getValue()->getValue().srem(Factor)) {
- std::vector<SCEVHandle> NewMulOps(M->getOperands());
+ const SmallVectorImpl<SCEVHandle> &MOperands = M->getOperands();
+ SmallVector<SCEVHandle, 4> NewMulOps(MOperands.begin(), MOperands.end());
NewMulOps[0] =
SE.getConstant(C->getValue()->getValue().sdiv(Factor));
S = SE.getMulExpr(NewMulOps);
@@ -239,7 +240,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
Value *V) {
const Type *ElTy = PTy->getElementType();
SmallVector<Value *, 4> GepIndices;
- std::vector<SCEVHandle> Ops(op_begin, op_end);
+ SmallVector<SCEVHandle, 8> Ops(op_begin, op_end);
bool AnyNonZeroIndices = false;
// Decend down the pointer's type and attempt to convert the other
@@ -250,8 +251,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
for (;;) {
APInt ElSize = APInt(SE.getTypeSizeInBits(Ty),
ElTy->isSized() ? SE.TD->getTypeAllocSize(ElTy) : 0);
- std::vector<SCEVHandle> NewOps;
- std::vector<SCEVHandle> ScaledOps;
+ SmallVector<SCEVHandle, 8> NewOps;
+ SmallVector<SCEVHandle, 8> ScaledOps;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
// Split AddRecs up into parts as either of the parts may be usable
// without the other.
@@ -297,9 +298,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin,
GepIndices.push_back(ConstantInt::get(Type::Int32Ty, ElIdx));
ElTy = STy->getTypeAtIndex(ElIdx);
Ops[0] =
- SE.getConstant(ConstantInt::get(Ty,
- FullOffset -
- SL.getElementOffset(ElIdx)));
+ SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
AnyNonZeroIndices = true;
continue;
}
@@ -365,7 +364,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
// comments on expandAddToGEP for details.
if (SE.TD)
if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
- const std::vector<SCEVHandle> &Ops = S->getOperands();
+ const SmallVectorImpl<SCEVHandle> &Ops = S->getOperands();
return expandAddToGEP(&Ops[0], &Ops[Ops.size() - 1],
PTy, Ty, V);
}
@@ -432,7 +431,7 @@ static void ExposePointerBase(SCEVHandle &Base, SCEVHandle &Rest,
}
if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
Base = A->getOperand(A->getNumOperands()-1);
- std::vector<SCEVHandle> NewAddOps(A->op_begin(), A->op_end());
+ SmallVector<SCEVHandle, 8> NewAddOps(A->op_begin(), A->op_end());
NewAddOps.back() = Rest;
Rest = SE.getAddExpr(NewAddOps);
ExposePointerBase(Base, Rest, SE);
@@ -473,7 +472,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
// {X,+,F} --> X + {0,+,F}
if (!S->getStart()->isZero()) {
- std::vector<SCEVHandle> NewOps(S->getOperands());
+ const SmallVectorImpl<SCEVHandle> &SOperands = S->getOperands();
+ SmallVector<SCEVHandle, 4> NewOps(SOperands.begin(), SOperands.end());
NewOps[0] = SE.getIntegerSCEV(0, Ty);
SCEVHandle Rest = SE.getAddRecExpr(NewOps, L);
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 45f97b8..17ffa2d 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -52,11 +52,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = Mask.getBitWidth();
- assert((V->getType()->isInteger() || isa<PointerType>(V->getType())) &&
+ assert((V->getType()->isIntOrIntVector() || isa<PointerType>(V->getType())) &&
"Not integer or pointer type!");
- assert((!TD || TD->getTypeSizeInBits(V->getType()) == BitWidth) &&
- (!isa<IntegerType>(V->getType()) ||
- V->getType()->getPrimitiveSizeInBits() == BitWidth) &&
+ assert((!TD ||
+ TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
+ (!V->getType()->isIntOrIntVector() ||
+ V->getType()->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
"V, Mask, KnownOne and KnownZero should have same BitWidth");
@@ -67,12 +68,26 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
KnownZero = ~KnownOne & Mask;
return;
}
- // Null is all-zeros.
- if (isa<ConstantPointerNull>(V)) {
+ // Null and aggregate-zero are all-zeros.
+ if (isa<ConstantPointerNull>(V) ||
+ isa<ConstantAggregateZero>(V)) {
KnownOne.clear();
KnownZero = Mask;
return;
}
+ // Handle a constant vector by taking the intersection of the known bits of
+ // each element.
+ if (ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
+ KnownZero.set(); KnownOne.set();
+ for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i) {
+ APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0);
+ ComputeMaskedBits(CV->getOperand(i), Mask, KnownZero2, KnownOne2,
+ TD, Depth);
+ KnownZero &= KnownZero2;
+ KnownOne &= KnownOne2;
+ }
+ return;
+ }
// The address of an aligned GlobalValue has trailing zeros.
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
unsigned Align = GV->getAlignment();
@@ -218,7 +233,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
const Type *SrcTy = I->getOperand(0)->getType();
unsigned SrcBitWidth = TD ?
TD->getTypeSizeInBits(SrcTy) :
- SrcTy->getPrimitiveSizeInBits();
+ SrcTy->getScalarSizeInBits();
APInt MaskIn(Mask);
MaskIn.zextOrTrunc(SrcBitWidth);
KnownZero.zextOrTrunc(SrcBitWidth);
@@ -480,7 +495,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
// Handle array index arithmetic.
const Type *IndexedTy = GTI.getIndexedType();
if (!IndexedTy->isSized()) return;
- unsigned GEPOpiBits = Index->getType()->getPrimitiveSizeInBits();
+ unsigned GEPOpiBits = Index->getType()->getScalarSizeInBits();
uint64_t TypeSize = TD ? TD->getTypeAllocSize(IndexedTy) : 1;
LocalMask = APInt::getAllOnesValue(GEPOpiBits);
LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0);
@@ -609,8 +624,8 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask,
/// 'Op' must have a scalar integer type.
///
unsigned llvm::ComputeNumSignBits(Value *V, TargetData *TD, unsigned Depth) {
- const IntegerType *Ty = cast<IntegerType>(V->getType());
- unsigned TyBits = Ty->getBitWidth();
+ const Type *Ty = V->getType();
+ unsigned TyBits = Ty->getScalarSizeInBits();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index c5190ef..b3f7cdb 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -526,6 +526,10 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(coldcc);
KEYWORD(x86_stdcallcc);
KEYWORD(x86_fastcallcc);
+ KEYWORD(arm_apcscc);
+ KEYWORD(arm_aapcscc);
+ KEYWORD(arm_aapcs_vfpcc);
+
KEYWORD(cc);
KEYWORD(c);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 4863f3c..909370c 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -808,8 +808,11 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
/// ::= 'coldcc'
/// ::= 'x86_stdcallcc'
/// ::= 'x86_fastcallcc'
+/// ::= 'arm_apcscc'
+/// ::= 'arm_aapcscc'
+/// ::= 'arm_aapcs_vfpcc'
/// ::= 'cc' UINT
-///
+///
bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
switch (Lex.getKind()) {
default: CC = CallingConv::C; return false;
@@ -818,6 +821,9 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
case lltok::kw_coldcc: CC = CallingConv::Cold; break;
case lltok::kw_x86_stdcallcc: CC = CallingConv::X86_StdCall; break;
case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+ case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
+ case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
+ case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
case lltok::kw_cc: Lex.Lex(); return ParseUInt32(CC);
}
Lex.Lex();
@@ -1743,7 +1749,7 @@ bool LLParser::ParseValID(ValID &ID) {
Lex.Lex();
if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
ParseGlobalTypeAndValue(SrcVal) ||
- ParseToken(lltok::kw_to, "expected 'to' int constantexpr cast") ||
+ ParseToken(lltok::kw_to, "expected 'to' in constantexpr cast") ||
ParseType(DestTy) ||
ParseToken(lltok::rparen, "expected ')' at end of constantexpr cast"))
return true;
@@ -3145,7 +3151,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS) {
}
/// ParseLoad
-/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' uint)?
+/// ::= 'volatile'? 'load' TypeAndValue (',' 'align' i32)?
bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
bool isVolatile) {
Value *Val; LocTy Loc;
@@ -3163,7 +3169,7 @@ bool LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
}
/// ParseStore
-/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' uint)?
+/// ::= 'volatile'? 'store' TypeAndValue ',' TypeAndValue (',' 'align' i32)?
bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
bool isVolatile) {
Value *Val, *Ptr; LocTy Loc, PtrLoc;
@@ -3186,7 +3192,7 @@ bool LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
}
/// ParseGetResult
-/// ::= 'getresult' TypeAndValue ',' uint
+/// ::= 'getresult' TypeAndValue ',' i32
/// FIXME: Remove support for getresult in LLVM 3.0
bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
Value *Val; LocTy ValLoc, EltLoc;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 9335d19..cff89f8 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -60,7 +60,9 @@ namespace lltok {
kw_gc,
kw_c,
- kw_cc, kw_ccc, kw_fastcc, kw_coldcc, kw_x86_stdcallcc, kw_x86_fastcallcc,
+ kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
+ kw_x86_stdcallcc, kw_x86_fastcallcc,
+ kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
kw_signext,
kw_zeroext,
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 3b44f564..6b9606c 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2040,14 +2040,13 @@ void BitcodeReader::dematerializeFunction(Function *F) {
Module *BitcodeReader::materializeModule(std::string *ErrInfo) {
- for (DenseMap<Function*, std::pair<uint64_t, unsigned> >::iterator I =
- DeferredFunctionInfo.begin(), E = DeferredFunctionInfo.end(); I != E;
- ++I) {
- Function *F = I->first;
+ // Iterate over the module, deserializing any functions that are still on
+ // disk.
+ for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+ F != E; ++F)
if (F->hasNotBeenReadFromBitcode() &&
materializeFunction(F, ErrInfo))
return 0;
- }
// Upgrade any intrinsic calls that slipped through (should not happen!) and
// delete the old functions to clean up. We can't do this unless the entire
@@ -2123,7 +2122,7 @@ Module *llvm::ParseBitcodeFile(MemoryBuffer *Buffer, std::string *ErrMsg){
// is run.
if (M)
M = R->releaseModule(ErrMsg);
-
+
delete R;
return M;
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 45462da..e931904 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -152,6 +152,9 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
bool AsmPrinter::doInitialization(Module &M) {
Mang = new Mangler(M, TAI->getGlobalPrefix(), TAI->getPrivateGlobalPrefix());
+ if (TAI->doesAllowQuotesInName())
+ Mang->setUseQuotes(true);
+
GCModuleInfo *MI = getAnalysisIfAvailable<GCModuleInfo>();
assert(MI && "AsmPrinter didn't require GCModuleInfo?");
@@ -174,9 +177,17 @@ bool AsmPrinter::doInitialization(Module &M) {
SwitchToDataSection(""); // Reset back to no section.
- MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
- if (MMI) MMI->AnalyzeModule(M);
- DW = getAnalysisIfAvailable<DwarfWriter>();
+ if (TAI->doesSupportDebugInformation()
+ || TAI->doesSupportExceptionHandling()) {
+ MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ if (MMI) {
+ MMI->AnalyzeModule(M);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
+ if (DW)
+ DW->BeginModule(&M, MMI, O, this, TAI);
+ }
+ }
+
return false;
}
@@ -347,8 +358,9 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
const char* JumpTableDataSection = TAI->getJumpTableDataSection();
const Function *F = MF.getFunction();
unsigned SectionFlags = TAI->SectionFlagsForGlobal(F);
+ bool JTInDiffSection = false;
if ((IsPic && !(LoweringInfo && LoweringInfo->usesGlobalOffsetTable())) ||
- !JumpTableDataSection ||
+ !JumpTableDataSection ||
SectionFlags & SectionFlags::Linkonce) {
// In PIC mode, we need to emit the jump table to the same section as the
// function body itself, otherwise the label differences won't make sense.
@@ -357,6 +369,7 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
SwitchToSection(TAI->SectionForGlobal(F));
} else {
SwitchToDataSection(JumpTableDataSection);
+ JTInDiffSection = true;
}
EmitAlignment(Log2_32(MJTI->getAlignment()));
@@ -380,8 +393,10 @@ void AsmPrinter::EmitJumpTableInfo(MachineJumpTableInfo *MJTI,
// before each jump table. The first label is never referenced, but tells
// the assembler and linker the extents of the jump table object. The
// second label is actually referenced by the code.
- if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
- O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+ if (JTInDiffSection) {
+ if (const char *JTLabelPrefix = TAI->getJumpTableSpecialLabelPrefix())
+ O << JTLabelPrefix << "JTI" << getFunctionNumber() << '_' << i << ":\n";
+ }
O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
<< '_' << i << ":\n";
@@ -502,7 +517,7 @@ const GlobalValue * AsmPrinter::findGlobalValue(const Constant *CV) {
void AsmPrinter::EmitLLVMUsedList(Constant *List) {
const char *Directive = TAI->getUsedDirective();
- // Should be an array of 'sbyte*'.
+ // Should be an array of 'i8*'.
ConstantArray *InitList = dyn_cast<ConstantArray>(List);
if (InitList == 0) return;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c773378..9d340e3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1757,6 +1757,9 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
if (TimePassesIsEnabled)
DebugTimer->startTimer();
+ CompileUnit *Unit = MainCU;
+ if (!Unit)
+ Unit = &FindCompileUnit(SP.getCompileUnit());
GlobalVariable *GV = SP.getGV();
DenseMap<const GlobalVariable *, DbgScope *>::iterator
II = AbstractInstanceRootMap.find(GV);
@@ -1767,7 +1770,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
DbgScope *Scope = new DbgScope(NULL, DIDescriptor(GV));
// Get the compile unit context.
- CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
DIE *SPDie = Unit->getDieMapSlotFor(GV);
if (!SPDie)
SPDie = CreateSubprogramDIE(Unit, SP, false, true);
@@ -1789,7 +1791,6 @@ unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
// Create a concrete inlined instance for this inlined function.
DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(GV));
DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
- CompileUnit *Unit = &FindCompileUnit(SP.getCompileUnit());
ScopeDie->setAbstractCompileUnit(Unit);
DIE *Origin = Unit->getDieMapSlotFor(GV);
@@ -1850,7 +1851,14 @@ unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
}
SmallVector<DbgScope *, 8> &Scopes = I->second;
- assert(!Scopes.empty() && "We should have at least one debug scope!");
+ if (Scopes.empty()) {
+ // Returned ID is 0 if this is unbalanced "end of inlined
+ // scope". This could happen if optimizer eats dbg intrinsics
+ // or "beginning of inlined scope" is not recoginized due to
+ // missing location info. In such cases, ignore this region.end.
+ return 0;
+ }
+
DbgScope *Scope = Scopes.back(); Scopes.pop_back();
unsigned ID = MMI->NextLabelID();
MMI->RecordUsedDbgLabel(ID);
@@ -1987,8 +1995,8 @@ void DwarfDebug::EmitInitial() {
Asm->SwitchToDataSection(TAI->getDwarfARangesSection());
EmitLabel("section_aranges", 0);
- if (TAI->doesSupportMacInfoSection()) {
- Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
+ Asm->SwitchToDataSection(LineInfoDirective);
EmitLabel("section_macinfo", 0);
}
@@ -2534,9 +2542,9 @@ void DwarfDebug::EmitDebugRanges() {
/// EmitDebugMacInfo - Emit visible names into a debug macinfo section.
///
void DwarfDebug::EmitDebugMacInfo() {
- if (TAI->doesSupportMacInfoSection()) {
+ if (const char *LineInfoDirective = TAI->getDwarfMacroInfoSection()) {
// Start the dwarf macinfo section.
- Asm->SwitchToDataSection(TAI->getDwarfMacInfoSection());
+ Asm->SwitchToDataSection(LineInfoDirective);
Asm->EOL();
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index f7ca4f4..a1b97df 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -190,7 +190,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
Asm->EmitULEB128Bytes(Offset);
Asm->EOL("Offset");
} else {
- assert(0 && "Machine move no supported yet.");
+ assert(0 && "Machine move not supported yet.");
}
} else if (Src.isReg() &&
Src.getReg() == MachineLocation::VirtualFP) {
@@ -200,7 +200,7 @@ void Dwarf::EmitFrameMoves(const char *BaseLabel, unsigned BaseLabelID,
Asm->EmitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), isEH));
Asm->EOL("Register");
} else {
- assert(0 && "Machine move no supported yet.");
+ assert(0 && "Machine move not supported yet.");
}
} else {
unsigned Reg = RI->getDwarfRegNum(Src.getReg(), isEH);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 1d0887f..4d5c3c2 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -547,7 +547,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
// fallthrough.
if (!BBI.FalseBB)
BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB);
- assert(BBI.FalseBB && "Expected to find the fallthrough block!");
+ if (!BBI.FalseBB) {
+ // Malformed bcc? True and false blocks are the same?
+ BBI.IsUnpredicable = true;
+ return;
+ }
}
// Then scan all the instructions.
@@ -663,6 +667,13 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
return BBI;
}
+ // Do not ifcvt if true and false fallthrough blocks are the same.
+ if (!BBI.FalseBB) {
+ BBI.IsBeingAnalyzed = false;
+ BBI.IsAnalyzed = true;
+ return BBI;
+ }
+
BBInfo &TrueBBI = AnalyzeBlock(BBI.TrueBB, Tokens);
BBInfo &FalseBBI = AnalyzeBlock(BBI.FalseBB, Tokens);
diff --git a/lib/CodeGen/LazyLiveness.cpp b/lib/CodeGen/LazyLiveness.cpp
index 6fb35d2..a951c99 100644
--- a/lib/CodeGen/LazyLiveness.cpp
+++ b/lib/CodeGen/LazyLiveness.cpp
@@ -32,10 +32,12 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf,
calculated.set(preorder[MBB]);
for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) {
+ assert(rev_preorder.size() > *I && "Unknown block!");
+
MachineBasicBlock* SrcMBB = rev_preorder[*I];
- for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin();
- SI != SrcMBB->succ_end(); ++SI) {
+ for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(),
+ SE = SrcMBB->succ_end(); SI != SE; ++SI) {
MachineBasicBlock* TgtMBB = *SI;
if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) &&
@@ -44,7 +46,8 @@ void LazyLiveness::computeBackedgeChain(MachineFunction& mf,
computeBackedgeChain(mf, TgtMBB);
tv[MBB].set(preorder[TgtMBB]);
- tv[MBB] |= tv[TgtMBB];
+ SparseBitVector<128> right = tv[TgtMBB];
+ tv[MBB] |= right;
}
}
@@ -60,6 +63,12 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
backedge_target.clear();
calculated.clear();
preorder.clear();
+ rev_preorder.clear();
+
+ rv.resize(mf.size());
+ tv.resize(mf.size());
+ preorder.resize(mf.size());
+ rev_preorder.reserve(mf.size());
MRI = &mf.getRegInfo();
MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
@@ -106,8 +115,8 @@ bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) {
for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(),
SE = (*POI)->succ_end(); SI != SE; ++SI)
if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) {
- SparseBitVector<128>& PBV = tv[*POI];
- PBV = tv[*SI];
+ SparseBitVector<128> right = tv[*SI];
+ tv[*POI] |= right;
}
for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()),
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 67120b8..cac9253 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -19,6 +19,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
@@ -305,9 +306,9 @@ void LiveInterval::removeRange(unsigned Start, unsigned End,
VNInfo *VNI = valnos.back();
valnos.pop_back();
VNI->~VNInfo();
- } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } while (!valnos.empty() && valnos.back()->isUnused());
} else {
- ValNo->def = ~1U;
+ ValNo->setIsUnused(true);
}
}
}
@@ -353,9 +354,9 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
VNInfo *VNI = valnos.back();
valnos.pop_back();
VNI->~VNInfo();
- } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } while (!valnos.empty() && valnos.back()->isUnused());
} else {
- ValNo->def = ~1U;
+ ValNo->setIsUnused(true);
}
}
@@ -371,9 +372,8 @@ void LiveInterval::scaleNumbering(unsigned factor) {
// Scale VNI info.
for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
VNInfo *vni = *VNI;
- if (vni->def != ~0U && vni->def != ~1U) {
- vni->def = InstrSlots::scale(vni->def, factor);
- }
+
+ vni->def = InstrSlots::scale(vni->def, factor);
for (unsigned i = 0; i < vni->kills.size(); ++i) {
if (vni->kills[i] != 0)
@@ -421,13 +421,13 @@ VNInfo *LiveInterval::findDefinedVNInfo(unsigned DefIdxOrReg) const {
return VNI;
}
-
/// join - Join two live intervals (this, and other) together. This applies
/// mappings to the value numbers in the LHS/RHS intervals as specified. If
/// the intervals are not joinable, this aborts.
void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
const int *RHSValNoAssignments,
- SmallVector<VNInfo*, 16> &NewVNInfo) {
+ SmallVector<VNInfo*, 16> &NewVNInfo,
+ MachineRegisterInfo *MRI) {
// Determine if any of our live range values are mapped. This is uncommon, so
// we want to avoid the interval scan if not.
bool MustMapCurValNos = false;
@@ -502,8 +502,18 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments,
}
weight += Other.weight;
- if (Other.preference && !preference)
- preference = Other.preference;
+
+ // Update regalloc hint if currently there isn't one.
+ if (TargetRegisterInfo::isVirtualRegister(reg) &&
+ TargetRegisterInfo::isVirtualRegister(Other.reg)) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(reg);
+ if (Hint.first == 0 && Hint.second == 0) {
+ std::pair<unsigned, unsigned> OtherHint =
+ MRI->getRegAllocationHint(Other.reg);
+ if (OtherHint.first || OtherHint.second)
+ MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second);
+ }
+ }
}
/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live
@@ -582,9 +592,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS,
VNInfo *VNI = valnos.back();
valnos.pop_back();
VNI->~VNInfo();
- } while (!valnos.empty() && valnos.back()->def == ~1U);
+ } while (!valnos.empty() && valnos.back()->isUnused());
} else {
- V1->def = ~1U;
+ V1->setIsUnused(true);
}
}
}
@@ -611,7 +621,7 @@ void LiveInterval::MergeInClobberRanges(const LiveInterval &Clobbers,
else if (UnusedValNo)
ClobberValNo = UnusedValNo;
else {
- UnusedValNo = ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+ UnusedValNo = ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
}
@@ -664,7 +674,7 @@ void LiveInterval::MergeInClobberRange(unsigned Start, unsigned End,
BumpPtrAllocator &VNInfoAllocator) {
// Find a value # to use for the clobber ranges. If there is already a value#
// for unknown values, use it.
- VNInfo *ClobberValNo = getNextValue(~0U, 0, VNInfoAllocator);
+ VNInfo *ClobberValNo = getNextValue(0, 0, false, VNInfoAllocator);
iterator IP = begin();
IP = std::upper_bound(IP, end(), Start);
@@ -747,24 +757,26 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
VNInfo *VNI = valnos.back();
valnos.pop_back();
VNI->~VNInfo();
- } while (valnos.back()->def == ~1U);
+ } while (valnos.back()->isUnused());
} else {
- V1->def = ~1U;
+ V1->setIsUnused(true);
}
return V2;
}
void LiveInterval::Copy(const LiveInterval &RHS,
+ MachineRegisterInfo *MRI,
BumpPtrAllocator &VNInfoAllocator) {
ranges.clear();
valnos.clear();
- preference = RHS.preference;
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
+ MRI->setRegAllocationHint(reg, Hint.first, Hint.second);
+
weight = RHS.weight;
for (unsigned i = 0, e = RHS.getNumValNums(); i != e; ++i) {
const VNInfo *VNI = RHS.getValNumInfo(i);
- VNInfo *NewVNI = getNextValue(~0U, 0, VNInfoAllocator);
- copyValNumInfo(NewVNI, VNI);
+ createValueCopy(VNI, VNInfoAllocator);
}
for (unsigned i = 0, e = RHS.ranges.size(); i != e; ++i) {
const LiveRange &LR = RHS.ranges[i];
@@ -816,22 +828,22 @@ void LiveInterval::print(std::ostream &OS,
const VNInfo *vni = *i;
if (vnum) OS << " ";
OS << vnum << "@";
- if (vni->def == ~1U) {
+ if (vni->isUnused()) {
OS << "x";
} else {
- if (vni->def == ~0U)
+ if (!vni->isDefAccurate())
OS << "?";
else
OS << vni->def;
unsigned ee = vni->kills.size();
- if (ee || vni->hasPHIKill) {
+ if (ee || vni->hasPHIKill()) {
OS << "-(";
for (unsigned j = 0; j != ee; ++j) {
OS << vni->kills[j];
if (j != ee-1)
OS << " ";
}
- if (vni->hasPHIKill) {
+ if (vni->hasPHIKill()) {
if (ee)
OS << " ";
OS << "phi";
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index cf0a648..d6931df 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -199,7 +199,7 @@ void LiveIntervals::computeNumbering() {
// Remap the VNInfo def index, which works the same as the
// start indices above. VN's with special sentinel defs
// don't need to be remapped.
- if (vni->def != ~0U && vni->def != ~1U) {
+ if (vni->isDefAccurate() && !vni->isUnused()) {
unsigned index = vni->def / InstrSlots::NUM;
unsigned offset = vni->def % InstrSlots::NUM;
if (offset == InstrSlots::LOAD) {
@@ -447,7 +447,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = mi;
// Earlyclobbers move back one.
- ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
@@ -539,13 +539,15 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// The new value number (#1) is defined by the instruction we claimed
// defined value #0.
VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->copy,
+ false, // update at *
VNInfoAllocator);
-
+ ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+
// Value#0 is now defined by the 2-addr instruction.
OldValNo->def = RedefIndex;
OldValNo->copy = 0;
if (MO.isEarlyClobber())
- OldValNo->redefByEC = true;
+ OldValNo->setHasRedefByEC(true);
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -577,12 +579,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DOUT << " Removing [" << Start << "," << End << "] from: ";
interval.print(DOUT, tri_); DOUT << "\n";
interval.removeRange(Start, End);
- VNI->hasPHIKill = true;
+ VNI->setHasPHIKill(true);
DOUT << " RESULT: "; interval.print(DOUT, tri_);
// Replace the interval with one of a NEW value number. Note that this
// value number isn't actually defined by an instruction, weird huh? :)
- LiveRange LR(Start, End, interval.getNextValue(~0, 0, VNInfoAllocator));
+ LiveRange LR(Start, End,
+ interval.getNextValue(mbb->getNumber(), 0, false, VNInfoAllocator));
+ LR.valno->setIsPHIDef(true);
DOUT << " replace range with " << LR;
interval.addRange(LR);
interval.addKill(LR.valno, End);
@@ -604,13 +608,13 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
mi->getOpcode() == TargetInstrInfo::SUBREG_TO_REG ||
tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))
CopyMI = mi;
- ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
+ ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
unsigned killIndex = getMBBEndIdx(mbb) + 1;
LiveRange LR(defIndex, killIndex, ValNo);
interval.addRange(LR);
interval.addKill(ValNo, killIndex);
- ValNo->hasPHIKill = true;
+ ValNo->setHasPHIKill(true);
DOUT << " +" << LR;
}
}
@@ -692,9 +696,9 @@ exit:
LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
bool Extend = OldLR != interval.end();
VNInfo *ValNo = Extend
- ? OldLR->valno : interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
if (MO.isEarlyClobber() && Extend)
- ValNo->redefByEC = true;
+ ValNo->setHasRedefByEC(true);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
interval.addKill(LR.valno, end);
@@ -750,7 +754,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
DOUT << " killed";
end = getUseIndex(baseIndex) + 1;
SeenDefUse = true;
- goto exit;
+ break;
} else if (mi->modifiesRegister(interval.reg, tri_)) {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that defines
@@ -759,7 +763,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
DOUT << " dead";
end = getDefIndex(start) + 1;
SeenDefUse = true;
- goto exit;
+ break;
}
baseIndex += InstrSlots::NUM;
@@ -771,7 +775,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
}
}
-exit:
// Live-in register might not be used at all.
if (!SeenDefUse) {
if (isAlias) {
@@ -783,7 +786,11 @@ exit:
}
}
- LiveRange LR(start, end, interval.getNextValue(~0U, 0, VNInfoAllocator));
+ VNInfo *vni =
+ interval.getNextValue(MBB->getNumber(), 0, false, VNInfoAllocator);
+ vni->setIsPHIDef(true);
+ LiveRange LR(start, end, vni);
+
interval.addRange(LR);
interval.addKill(LR.valno, end);
DOUT << " +" << LR << '\n';
@@ -896,7 +903,7 @@ LiveInterval* LiveIntervals::createInterval(unsigned reg) {
/// managing the allocated memory.
LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
LiveInterval *NewLI = createInterval(li->reg);
- NewLI->Copy(*li, getVNInfoAllocator());
+ NewLI->Copy(*li, mri_, getVNInfoAllocator());
return NewLI;
}
@@ -1099,13 +1106,12 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
i != e; ++i) {
const VNInfo *VNI = *i;
- unsigned DefIdx = VNI->def;
- if (DefIdx == ~1U)
+ if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- if (DefIdx == ~0u)
+ if (!VNI->isDefAccurate())
return false;
- MachineInstr *ReMatDefMI = getInstructionFromIndex(DefIdx);
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
bool DefIsLoad = false;
if (!ReMatDefMI ||
!isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1450,7 +1456,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (HasUse) {
if (CreatedNewVReg) {
LiveRange LR(getLoadIndex(index), getUseIndex(index)+1,
- nI.getNextValue(~0U, 0, VNInfoAllocator));
+ nI.getNextValue(0, 0, false, VNInfoAllocator));
DOUT << " +" << LR;
nI.addRange(LR);
} else {
@@ -1464,7 +1470,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
}
if (HasDef) {
LiveRange LR(getDefIndex(index), getStoreIndex(index),
- nI.getNextValue(~0U, 0, VNInfoAllocator));
+ nI.getNextValue(0, 0, false, VNInfoAllocator));
DOUT << " +" << LR;
nI.addRange(LR);
}
@@ -1840,14 +1846,14 @@ addIntervalsForSpillsFast(const LiveInterval &li,
unsigned index = getInstructionIndex(MI);
if (HasUse) {
LiveRange LR(getLoadIndex(index), getUseIndex(index),
- nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+ nI.getNextValue(0, 0, false, getVNInfoAllocator()));
DOUT << " +" << LR;
nI.addRange(LR);
vrm.addRestorePoint(NewVReg, MI);
}
if (HasDef) {
LiveRange LR(getDefIndex(index), getStoreIndex(index),
- nI.getNextValue(~0U, 0, getVNInfoAllocator()));
+ nI.getNextValue(0, 0, false, getVNInfoAllocator()));
DOUT << " +" << LR;
nI.addRange(LR);
vrm.addSpillPoint(NewVReg, true, MI);
@@ -1961,12 +1967,11 @@ addIntervalsForSpills(const LiveInterval &li,
i != e; ++i) {
const VNInfo *VNI = *i;
unsigned VN = VNI->id;
- unsigned DefIdx = VNI->def;
- if (DefIdx == ~1U)
+ if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = (DefIdx == ~0u)
- ? 0 : getInstructionFromIndex(DefIdx);
+ MachineInstr *ReMatDefMI = VNI->isDefAccurate()
+ ? getInstructionFromIndex(VNI->def) : 0;
bool dummy;
if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
// Remember how to remat the def of this val#.
@@ -1977,7 +1982,7 @@ addIntervalsForSpills(const LiveInterval &li,
ReMatDefs[VN] = Clone;
bool CanDelete = true;
- if (VNI->hasPHIKill) {
+ if (VNI->hasPHIKill()) {
// A kill is a phi node, not all of its uses can be rematerialized.
// It must not be deleted.
CanDelete = false;
@@ -2287,8 +2292,8 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
LiveInterval& Interval = getOrCreateInterval(reg);
VNInfo* VN = Interval.getNextValue(
getInstructionIndex(startInst) + InstrSlots::DEF,
- startInst, getVNInfoAllocator());
- VN->hasPHIKill = true;
+ startInst, true, getVNInfoAllocator());
+ VN->setHasPHIKill(true);
VN->kills.push_back(getMBBEndIdx(startInst->getParent()));
LiveRange LR(getInstructionIndex(startInst) + InstrSlots::DEF,
getMBBEndIdx(startInst->getParent()) + 1, VN);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 6228821..bd84508 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -359,12 +359,11 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
// That is, unless we are currently processing the last reference itself.
LastRefOrPartRef->addRegisterDead(Reg, TRI, true);
- /* Partial uses. Mark register def dead and add implicit def of
- sub-registers which are used.
- FIXME: LiveIntervalAnalysis can't handle this yet!
- EAX<dead> = op AL<imp-def>
- That is, EAX def is dead but AL def extends pass it.
- Enable this after live interval analysis is fixed to improve codegen!
+ // Partial uses. Mark register def dead and add implicit def of
+ // sub-registers which are used.
+ // EAX<dead> = op AL<imp-def>
+ // That is, EAX def is dead but AL def extends pass it.
+ // Enable this after live interval analysis is fixed to improve codegen!
else if (!PhysRegUse[Reg]) {
PhysRegDef[Reg]->addRegisterDead(Reg, TRI, true);
for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
@@ -377,7 +376,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
PartUses.erase(*SS);
}
}
- } */
+ }
else
LastRefOrPartRef->addRegisterKilled(Reg, TRI, true);
return true;
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 4f5ab1f..544d83a 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -16,6 +16,7 @@ using namespace llvm;
MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
VRegInfo.reserve(256);
+ RegAllocHints.reserve(256);
RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1.
UsedPhysRegs.resize(TRI.getNumRegs());
@@ -64,6 +65,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
// Add a reg, but keep track of whether the vector reallocated or not.
void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
+ RegAllocHints.push_back(std::make_pair(0, 0));
if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
// The vector reallocated, handle this now.
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 97d4728..ae60c86 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -343,7 +343,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
if (CurrSLI->hasAtLeastOneValue())
CurrSValNo = CurrSLI->getValNumInfo(0);
else
- CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+ CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
return SS;
}
@@ -637,8 +637,9 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
if (Phis.count(MBB)) return Phis[MBB];
unsigned StartIndex = LIs->getMBBStartIdx(MBB);
- VNInfo *RetVNI = Phis[MBB] = LI->getNextValue(~0U, /*FIXME*/ 0,
- LIs->getVNInfoAllocator());
+ VNInfo *RetVNI = Phis[MBB] =
+ LI->getNextValue(0, /*FIXME*/ 0, false, LIs->getVNInfoAllocator());
+
if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
// If there are no uses or defs between our starting point and the
@@ -654,7 +655,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
IncomingVNs[*PI] = Incoming;
}
- if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill) {
+ if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
VNInfo* OldVN = RetVNI;
VNInfo* NewVN = IncomingVNs.begin()->second;
VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
@@ -678,7 +679,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
// VNInfo to represent the joined value.
for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
- I->second->hasPHIKill = true;
+ I->second->setHasPHIKill(true);
unsigned KillIndex = LIs->getMBBEndIdx(I->first);
if (!LiveInterval::isKill(I->second, KillIndex))
LI->addKill(I->second, KillIndex);
@@ -730,7 +731,9 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
unsigned DefIdx = LIs->getInstructionIndex(&*DI);
DefIdx = LiveIntervals::getDefIndex(DefIdx);
- VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
+ assert(DI->getOpcode() != TargetInstrInfo::PHI &&
+ "Following NewVN isPHIDef flag incorrect. Fix me!");
+ VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
// If the def is a move, set the copy field.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
@@ -793,7 +796,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
// Bail out if we ever encounter a valno that has a PHI kill. We can't
// renumber these.
- if (OldVN->hasPHIKill) return;
+ if (OldVN->hasPHIKill()) return;
VNsToCopy.push_back(OldVN);
@@ -823,9 +826,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
VNInfo* OldVN = *OI;
// Copy the valno over
- VNInfo* NewVN = NewLI.getNextValue(OldVN->def, OldVN->copy,
- LIs->getVNInfoAllocator());
- NewLI.copyValNumInfo(NewVN, OldVN);
+ VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
// Remove the valno from the old interval
@@ -873,7 +874,7 @@ bool PreAllocSplitting::Rematerialize(unsigned vreg, VNInfo* ValNo,
MachineBasicBlock::iterator KillPt = BarrierMBB->end();
unsigned KillIdx = 0;
- if (ValNo->def == ~0U || DefMI->getParent() == BarrierMBB)
+ if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
else
KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
@@ -942,7 +943,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
if (CurrSLI->hasAtLeastOneValue())
CurrSValNo = CurrSLI->getValNumInfo(0);
else
- CurrSValNo = CurrSLI->getNextValue(~0U, 0, LSs->getVNInfoAllocator());
+ CurrSValNo = CurrSLI->getNextValue(0, 0, false, LSs->getVNInfoAllocator());
}
return FMI;
@@ -1032,13 +1033,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
CurrLI->FindLiveRangeContaining(LIs->getUseIndex(BarrierIdx));
VNInfo *ValNo = LR->valno;
- if (ValNo->def == ~1U) {
+ if (ValNo->isUnused()) {
// Defined by a dead def? How can this be?
assert(0 && "Val# is defined by a dead def?");
abort();
}
- MachineInstr *DefMI = (ValNo->def != ~0U)
+ MachineInstr *DefMI = ValNo->isDefAccurate()
? LIs->getInstructionFromIndex(ValNo->def) : NULL;
// If this would create a new join point, do not split.
@@ -1072,8 +1073,8 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
unsigned SpillIndex = 0;
MachineInstr *SpillMI = NULL;
int SS = -1;
- if (ValNo->def == ~0U) {
- // If it's defined by a phi, we must split just before the barrier.
+ if (!ValNo->isDefAccurate()) {
+ // If we don't know where the def is we must split just before the barrier.
if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
BarrierMBB, SS, RefsInMBB))) {
SpillIndex = LIs->getInstructionIndex(SpillMI);
@@ -1254,17 +1255,16 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
// We don't currently try to handle definitions with PHI kills, because
// it would involve processing more than one VNInfo at once.
- if (CurrVN->hasPHIKill) continue;
+ if (CurrVN->hasPHIKill()) continue;
// We also don't try to handle the results of PHI joins, since there's
// no defining instruction to analyze.
- unsigned DefIdx = CurrVN->def;
- if (DefIdx == ~0U || DefIdx == ~1U) continue;
+ if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
// We're only interested in eliminating cruft introduced by the splitter,
// is of the form load-use or load-use-store. First, check that the
// definition is a load, and remember what stack slot we loaded it from.
- MachineInstr* DefMI = LIs->getInstructionFromIndex(DefIdx);
+ MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
int FrameIndex;
if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
@@ -1383,7 +1383,7 @@ bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
if (DefMBB == BarrierMBB)
return false;
- if (LR->valno->hasPHIKill)
+ if (LR->valno->hasPHIKill())
return false;
unsigned MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 804fae5..41a42fd 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -281,7 +281,8 @@ namespace {
/// getFreePhysReg - return a free physical register for this virtual
/// register interval if we have one, otherwise return 0.
unsigned getFreePhysReg(LiveInterval* cur);
- unsigned getFreePhysReg(const TargetRegisterClass *RC,
+ unsigned getFreePhysReg(LiveInterval* cur,
+ const TargetRegisterClass *RC,
unsigned MaxInactiveCount,
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs);
@@ -352,11 +353,12 @@ void RALinScan::ComputeRelatedRegClasses() {
/// different register classes or because the coalescer was overly
/// conservative.
unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
- if ((cur.preference && cur.preference == Reg) || !cur.containsOneValue())
+ unsigned Preference = vrm_->getRegAllocPref(cur.reg);
+ if ((Preference && Preference == Reg) || !cur.containsOneValue())
return Reg;
VNInfo *vni = cur.begin()->valno;
- if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+ if (!vni->def || vni->isUnused() || !vni->isDefAccurate())
return Reg;
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
@@ -584,7 +586,7 @@ void RALinScan::linearScan()
// register allocator had to spill other registers in its register class.
if (ls_->getNumIntervals() == 0)
return;
- if (!vrm_->FindUnusedRegisters(tri_, li_))
+ if (!vrm_->FindUnusedRegisters(li_))
return;
}
@@ -743,7 +745,7 @@ static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
if (SI.hasAtLeastOneValue())
VNI = SI.getValNumInfo(0);
else
- VNI = SI.getNextValue(~0U, 0, ls_->getVNInfoAllocator());
+ VNI = SI.getNextValue(0, 0, false, ls_->getVNInfoAllocator());
LiveInterval &RI = li_->getInterval(cur->reg);
// FIXME: This may be overly conservative.
@@ -897,7 +899,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
// This is an implicitly defined live interval, just assign any register.
const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
if (cur->empty()) {
- unsigned physReg = cur->preference;
+ unsigned physReg = vrm_->getRegAllocPref(cur->reg);
if (!physReg)
physReg = *RC->allocation_order_begin(*mf_);
DOUT << tri_->getName(physReg) << '\n';
@@ -917,9 +919,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
// register class, then we should try to assign it the same register.
// This can happen when the move is from a larger register class to a smaller
// one, e.g. X86::mov32to32_. These move instructions are not coalescable.
- if (!cur->preference && cur->hasAtLeastOneValue()) {
+ if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
VNInfo *vni = cur->begin()->valno;
- if (vni->def && vni->def != ~1U && vni->def != ~0U) {
+ if (vni->def && !vni->isUnused() && vni->isDefAccurate()) {
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (CopyMI &&
@@ -935,7 +937,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
if (DstSubReg)
Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC);
if (Reg && allocatableRegs_[Reg] && RC->contains(Reg))
- cur->preference = Reg;
+ mri_->setRegAllocationHint(cur->reg, 0, Reg);
}
}
}
@@ -1044,7 +1046,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
// "Downgrade" physReg to try to keep physReg from being allocated until
// the next reload from the same SS is allocated.
- NextReloadLI->preference = physReg;
+ mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg);
DowngradeRegister(cur, physReg);
}
return;
@@ -1071,7 +1073,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
// Find a register to spill.
float minWeight = HUGE_VALF;
- unsigned minReg = 0; /*cur->preference*/; // Try the pref register first.
+ unsigned minReg = 0;
bool Found = false;
std::vector<std::pair<unsigned,float> > RegsWeights;
@@ -1290,7 +1292,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
// It interval has a preference, it must be defined by a copy. Clear the
// preference now since the source interval allocation may have been
// undone as well.
- i->preference = 0;
+ mri_->setRegAllocationHint(i->reg, 0, 0);
else {
UpgradeRegister(ii->second);
}
@@ -1346,15 +1348,23 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
}
}
-unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
+ const TargetRegisterClass *RC,
unsigned MaxInactiveCount,
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs) {
unsigned FreeReg = 0;
unsigned FreeRegInactiveCount = 0;
- TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
- TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+ std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
+ // Resolve second part of the hint (if possible) given the current allocation.
+ unsigned physReg = Hint.second;
+ if (physReg &&
+ TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+ physReg = vrm_->getPhys(physReg);
+
+ TargetRegisterClass::iterator I, E;
+ tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
assert(I != E && "No allocatable register in this register class!");
// Scan for the first available register.
@@ -1377,7 +1387,7 @@ unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
// return this register.
if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
return FreeReg;
-
+
// Continue scanning the registers, looking for the one with the highest
// inactive count. Alkis found that this reduced register pressure very
// slightly on X86 (in rev 1.94 of this file), though this should probably be
@@ -1428,20 +1438,21 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) {
// If copy coalescer has assigned a "preferred" register, check if it's
// available first.
- if (cur->preference) {
- DOUT << "(preferred: " << tri_->getName(cur->preference) << ") ";
- if (isRegAvail(cur->preference) &&
- RC->contains(cur->preference))
- return cur->preference;
+ unsigned Preference = vrm_->getRegAllocPref(cur->reg);
+ if (Preference) {
+ DOUT << "(preferred: " << tri_->getName(Preference) << ") ";
+ if (isRegAvail(Preference) &&
+ RC->contains(Preference))
+ return Preference;
}
if (!DowngradedRegs.empty()) {
- unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+ unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts,
true);
if (FreeReg)
return FreeReg;
}
- return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
+ return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false);
}
FunctionPass* llvm::createLinearScanRegisterAllocator() {
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 61450a7..89e2c59 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -651,7 +651,7 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
if (stackInterval.getNumValNums() != 0)
vni = stackInterval.getValNumInfo(0);
else
- vni = stackInterval.getNextValue(-0U, 0, lss->getVNInfoAllocator());
+ vni = stackInterval.getNextValue(0, 0, false, lss->getVNInfoAllocator());
LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
stackInterval.MergeRangesInAsValue(rhsInterval, vni);
@@ -733,8 +733,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
itr != end; ++itr) {
LiveInterval *li = *itr;
- unsigned physReg = li->preference;
-
+ unsigned physReg = vrm->getRegAllocPref(li->reg);
if (physReg == 0) {
const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
physReg = *liRC->allocation_order_begin(*mf);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 4a7dbeb..24fccf0 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -47,7 +47,6 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/DebugLoc.h"
#include "llvm/CodeGen/DwarfWriter.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/Target/TargetData.h"
@@ -361,7 +360,7 @@ bool FastISel::SelectCall(User *I) {
// Returned ID is 0 if this is unbalanced "end of inlined
// scope". This could happen if optimizer eats dbg intrinsics
// or "beginning of inlined scope" is not recoginized due to
- // missing location info. In such cases, do ignore this region.end.
+ // missing location info. In such cases, ignore this region.end.
BuildMI(MBB, DL, II).addImm(ID);
} else {
const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f3c2833..1bb8090 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2768,6 +2768,53 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
ISD::SETULT : ISD::SETUGT));
break;
}
+ case ISD::UMULO:
+ case ISD::SMULO: {
+ MVT VT = Node->getValueType(0);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+ if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (TLI.isTypeLegal(MVT::getIntegerVT(VT.getSizeInBits() * 2))) {
+ MVT WideVT = MVT::getIntegerVT(VT.getSizeInBits() * 2);
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(0));
+ TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
+ DAG.getIntPtrConstant(1));
+ } else {
+ // FIXME: We should be able to fall back to a libcall with an illegal
+ // type in some cases cases.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ assert(0 && "Don't know how to expand this operation yet!");
+ }
+ if (isSigned) {
+ Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
+ Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1,
+ ISD::SETNE);
+ } else {
+ TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ }
+ Results.push_back(BottomHalf);
+ Results.push_back(TopHalf);
+ break;
+ }
case ISD::BUILD_PAIR: {
MVT PairTy = Node->getValueType(0);
Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, PairTy, Node->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b7d7818..6e5adee 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -95,14 +95,13 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
if (InVT.isVector() && OutVT.isInteger()) {
// Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
// is legal but the result is not.
- MVT NVT = MVT::getVectorVT(TLI.getTypeToTransformTo(OutVT), 2);
+ MVT NVT = MVT::getVectorVT(NOutVT, 2);
if (isTypeLegal(NVT)) {
SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
- MVT EltNVT = NVT.getVectorElementType();
- Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp,
+ Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
DAG.getIntPtrConstant(0));
- Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltNVT, CastInOp,
+ Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
DAG.getIntPtrConstant(1));
if (TLI.isBigEndian())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index 93750d6..48ebd0f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -5317,8 +5317,12 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
if ((OpFlag & 7) == 2 /*REGDEF*/
|| (OpFlag & 7) == 6 /* EARLYCLOBBER REGDEF */) {
// Add (OpFlag&0xffff)>>3 registers to MatchedRegs.
- assert(!OpInfo.isIndirect &&
- "Don't know how to handle tied indirect register inputs yet!");
+ if (OpInfo.isIndirect) {
+ cerr << "llvm: error: "
+ "Don't know how to handle tied indirect "
+ "register inputs yet!\n";
+ exit(1);
+ }
RegsForValue MatchedRegs;
MatchedRegs.TLI = &TLI;
MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index ab4cd51..a771d46 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -171,6 +171,8 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
@@ -183,6 +185,8 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
@@ -271,6 +275,10 @@ RTLIB::Libcall RTLIB::getFPROUND(MVT OpVT, MVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F32_I16;
if (RetVT == MVT::i32)
return FPTOSINT_F32_I32;
if (RetVT == MVT::i64)
@@ -306,6 +314,10 @@ RTLIB::Libcall RTLIB::getFPTOSINT(MVT OpVT, MVT RetVT) {
/// UNKNOWN_LIBCALL if there is none.
RTLIB::Libcall RTLIB::getFPTOUINT(MVT OpVT, MVT RetVT) {
if (OpVT == MVT::f32) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F32_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F32_I16;
if (RetVT == MVT::i32)
return FPTOUINT_F32_I32;
if (RetVT == MVT::i64)
@@ -2584,8 +2596,12 @@ bool TargetLowering::CheckTailCallReturnConstraints(CallSDNode *TheCall,
// Check that operand of the RET node sources from the CALL node. The RET node
// has at least two operands. Operand 0 holds the chain. Operand 1 holds the
// value.
+ // Also we need to check that there is no code in between the call and the
+ // return. Hence we also check that the incomming chain to the return sources
+ // from the outgoing chain of the call.
if (NumOps > 1 &&
- IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0))
+ IgnoreHarmlessInstructions(Ret.getOperand(1)) == SDValue(TheCall,0) &&
+ Ret.getOperand(0) == SDValue(TheCall, TheCall->getNumValues()-1))
return true;
// void return: The RET node has the chain result value of the CALL node as
// input.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 2bc234f..2034805 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -141,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// The live interval of ECX is represented as this:
// %reg20,inf = [46,47:1)[174,230:0) 0@174-(230) 1@46-(47)
// The coalescer has no idea there was a def in the middle of [174,230].
- if (AValNo->redefByEC)
+ if (AValNo->hasRedefByEC())
return false;
// If AValNo is defined as a copy from IntB, we can potentially process this.
@@ -203,7 +203,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
LiveInterval &SRLI = li_->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart, 0, li_->getVNInfoAllocator())));
+ SRLI.getNextValue(FillerStart, 0, true,
+ li_->getVNInfoAllocator())));
}
}
@@ -304,8 +305,10 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
assert(ALR != IntA.end() && "Live range not found!");
VNInfo *AValNo = ALR->valno;
// If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (AValNo->def == ~0U || AValNo->def == ~1U || AValNo->hasPHIKill)
+ // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+ // tested?
+ if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
+ AValNo->isUnused() || AValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
const TargetInstrDesc &TID = DefMI->getDesc();
@@ -351,7 +354,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
NewMI->getOperand(OpIdx).setIsKill();
- bool BHasPHIKill = BValNo->hasPHIKill;
+ bool BHasPHIKill = BValNo->hasPHIKill();
SmallVector<VNInfo*, 4> BDeadValNos;
SmallVector<unsigned, 4> BKills;
std::map<unsigned, unsigned> BExtend;
@@ -403,7 +406,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
// extended to the end of the existing live range defined by the copy.
unsigned DefIdx = li_->getDefIndex(UseIdx);
const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
- BHasPHIKill |= DLR->valno->hasPHIKill;
+ BHasPHIKill |= DLR->valno->hasPHIKill();
assert(DLR->valno->def == DefIdx);
BDeadValNos.push_back(DLR->valno);
BExtend[DLR->start] = DLR->end;
@@ -462,7 +465,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
}
}
IntB.addKills(ValNo, BKills);
- ValNo->hasPHIKill = BHasPHIKill;
+ ValNo->setHasPHIKill(BHasPHIKill);
DOUT << " result = "; IntB.print(DOUT, tri_);
DOUT << "\n";
@@ -578,8 +581,10 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
assert(SrcLR != SrcInt.end() && "Live range not found!");
VNInfo *ValNo = SrcLR->valno;
// If other defs can reach uses of this def, then it's not safe to perform
- // the optimization.
- if (ValNo->def == ~0U || ValNo->def == ~1U || ValNo->hasPHIKill)
+ // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
+ // tested?
+ if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
+ ValNo->isUnused() || ValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
const TargetInstrDesc &TID = DefMI->getDesc();
@@ -616,19 +621,17 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
}
MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
- CopyMI->removeFromParent();
tii_->reMaterialize(*MBB, MII, DstReg, DefMI);
MachineInstr *NewMI = prior(MII);
if (checkForDeadDef) {
- // PR4090 fix: Trim interval failed because there was no use of the
- // source interval in this MBB. If the def is in this MBB too then we
- // should mark it dead:
- if (DefMI->getParent() == MBB) {
- DefMI->addRegisterDead(SrcInt.reg, tri_);
- SrcLR->end = SrcLR->start + 1;
- }
-
+ // PR4090 fix: Trim interval failed because there was no use of the
+ // source interval in this MBB. If the def is in this MBB too then we
+ // should mark it dead:
+ if (DefMI->getParent() == MBB) {
+ DefMI->addRegisterDead(SrcInt.reg, tri_);
+ SrcLR->end = SrcLR->start + 1;
+ }
}
// CopyMI may have implicit operands, transfer them over to the newly
@@ -647,7 +650,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
}
li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
- MBB->getParent()->DeleteMachineInstr(CopyMI);
+ CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
++NumReMats;
@@ -673,7 +676,7 @@ bool SimpleRegisterCoalescing::isBackEdgeCopy(MachineInstr *CopyMI,
return false;
unsigned KillIdx = li_->getMBBEndIdx(MBB) + 1;
if (DstLR->valno->kills.size() == 1 &&
- DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill)
+ DstLR->valno->kills[0] == KillIdx && DstLR->valno->hasPHIKill())
return true;
return false;
}
@@ -937,7 +940,7 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
LiveInterval::iterator LR = li.FindLiveRangeContaining(CopyIdx);
if (LR == li.end())
return false;
- if (LR->valno->hasPHIKill)
+ if (LR->valno->hasPHIKill())
return false;
if (LR->valno->def != CopyIdx)
return false;
@@ -965,11 +968,11 @@ bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI,
}
-/// RemoveCopiesFromValNo - The specified value# is defined by an implicit
-/// def and it is being removed. Turn all copies from this value# into
-/// identity copies so they will be removed.
-void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
- VNInfo *VNI) {
+/// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+/// implicit_def and it is being removed. Turn all copies from this value#
+/// into implicit_defs.
+void SimpleRegisterCoalescing::TurnCopiesFromValNoToImpDefs(LiveInterval &li,
+ VNInfo *VNI) {
SmallVector<MachineInstr*, 4> ImpDefs;
MachineOperand *LastUse = NULL;
unsigned LastUseIdx = li_->getUseIndex(VNI->def);
@@ -979,9 +982,8 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
MachineInstr *MI = &*RI;
++RI;
if (MO->isDef()) {
- if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
ImpDefs.push_back(MI);
- }
continue;
}
if (JoinedCopies.count(MI))
@@ -994,13 +996,18 @@ void SimpleRegisterCoalescing::RemoveCopiesFromValNo(LiveInterval &li,
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
SrcReg == li.reg) {
- // Each use MI may have multiple uses of this register. Change them all.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.getReg() == li.reg)
- MO.setReg(DstReg);
- }
- JoinedCopies.insert(MI);
+ // Change it to an implicit_def.
+ MI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
+ for (int i = MI->getNumOperands() - 1, e = 0; i > e; --i)
+ MI->RemoveOperand(i);
+ // It's no longer a copy, update the valno it defines.
+ unsigned DefIdx = li_->getDefIndex(UseIdx);
+ LiveInterval &DstInt = li_->getInterval(DstReg);
+ LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(DefIdx);
+ assert(DLR != DstInt.end() && "Live range not found!");
+ assert(DLR->valno->copy == MI);
+ DLR->valno->copy = NULL;
+ ReMatCopies.insert(MI);
} else if (UseIdx > LastUseIdx) {
LastUseIdx = UseIdx;
LastUse = MO;
@@ -1265,6 +1272,17 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
return true;
}
+/// getRegAllocPreference - Return register allocation preference register.
+///
+static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF,
+ MachineRegisterInfo *MRI,
+ const TargetRegisterInfo *TRI) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return 0;
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF);
+}
+
/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
/// which are the src/dst of the copy instruction CopyMI. This returns true
/// if the copy was successfully coalesced away. If it is not currently
@@ -1566,7 +1584,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
if (PhysJoinTweak) {
if (SrcIsPhys) {
if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
- DstInt.preference = SrcReg;
+ mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
++numAborts;
DOUT << "\tMay tie down a physical register, abort!\n";
Again = true; // May be possible to coalesce later.
@@ -1574,7 +1592,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
}
} else {
if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
- SrcInt.preference = DstReg;
+ mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
++numAborts;
DOUT << "\tMay tie down a physical register, abort!\n";
Again = true; // May be possible to coalesce later.
@@ -1598,7 +1616,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
if (Length > Threshold &&
(((float)std::distance(mri_->use_begin(JoinVReg),
mri_->use_end()) / Length) < Ratio)) {
- JoinVInt.preference = JoinPReg;
+ mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
++numAborts;
DOUT << "\tMay tie down a physical register, abort!\n";
Again = true; // May be possible to coalesce later.
@@ -1669,9 +1687,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
E = SavedLI->vni_end(); I != E; ++I) {
const VNInfo *ValNo = *I;
VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->copy,
+ false, // updated at *
li_->getVNInfoAllocator());
- NewValNo->hasPHIKill = ValNo->hasPHIKill;
- NewValNo->redefByEC = ValNo->redefByEC;
+ NewValNo->setFlags(ValNo->getFlags()); // * updated here.
RealInt.addKills(NewValNo, ValNo->kills);
RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo);
}
@@ -1691,7 +1709,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
!SrcIsPhys && !DstIsPhys) {
if ((isExtSubReg && !Swapped) ||
((isInsSubReg || isSubRegToReg) && Swapped)) {
- ResSrcInt->Copy(*ResDstInt, li_->getVNInfoAllocator());
+ ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator());
std::swap(SrcReg, DstReg);
std::swap(ResSrcInt, ResDstInt);
}
@@ -1710,7 +1728,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
for (LiveInterval::const_vni_iterator i = ResSrcInt->vni_begin(),
e = ResSrcInt->vni_end(); i != e; ++i) {
const VNInfo *vni = *i;
- if (!vni->def || vni->def == ~1U || vni->def == ~0U)
+ // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+ if (!vni->def || vni->isUnused() || vni->isPHIDef() || !vni->isDefAccurate())
continue;
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
unsigned NewSrcReg, NewDstReg, NewSrcSubIdx, NewDstSubIdx;
@@ -1747,6 +1766,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// being merged.
li_->removeInterval(SrcReg);
+ // Update regalloc hint.
+ tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_);
+
// Manually deleted the live interval copy.
if (SavedLI) {
SavedLI->clear();
@@ -1762,7 +1784,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
VNInfo *ImpVal = LR->valno;
assert(ImpVal->def == CopyIdx);
unsigned NextDef = LR->end;
- RemoveCopiesFromValNo(*ResDstInt, ImpVal);
+ TurnCopiesFromValNoToImpDefs(*ResDstInt, ImpVal);
ResDstInt->removeValNo(ImpVal);
LR = ResDstInt->FindLiveRangeContaining(NextDef);
if (LR != ResDstInt->end() && LR->valno->def == NextDef) {
@@ -1778,11 +1800,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// If resulting interval has a preference that no longer fits because of subreg
// coalescing, just clear the preference.
- if (ResDstInt->preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
+ unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_);
+ if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) &&
TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) {
const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg);
- if (!RC->contains(ResDstInt->preference))
- ResDstInt->preference = 0;
+ if (!RC->contains(Preference))
+ mri_->setRegAllocationHint(ResDstInt->reg, 0, 0);
}
DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_);
@@ -1856,7 +1879,8 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li,
unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno);
if (SrcReg == Reg)
return true;
- if (LR->valno->def == ~0U &&
+ // FIXME: Do isPHIDef and isDefAccurate both need to be tested?
+ if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) &&
TargetRegisterInfo::isPhysicalRegister(li.reg) &&
*tri_->getSuperRegisters(li.reg)) {
// It's a sub-register live interval, we may not have precise information.
@@ -2025,12 +2049,20 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
// Okay, the final step is to loop over the RHS live intervals, adding them to
// the LHS.
- LHSValNo->hasPHIKill |= VNI->hasPHIKill;
+ if (VNI->hasPHIKill())
+ LHSValNo->setHasPHIKill(true);
LHS.addKills(LHSValNo, VNI->kills);
LHS.MergeRangesInAsValue(RHS, LHSValNo);
LHS.weight += RHS.weight;
- if (RHS.preference && !LHS.preference)
- LHS.preference = RHS.preference;
+
+ // Update regalloc hint if both are virtual registers.
+ if (TargetRegisterInfo::isVirtualRegister(LHS.reg) &&
+ TargetRegisterInfo::isVirtualRegister(RHS.reg)) {
+ std::pair<unsigned, unsigned> RHSPref = mri_->getRegAllocationHint(RHS.reg);
+ std::pair<unsigned, unsigned> LHSPref = mri_->getRegAllocationHint(LHS.reg);
+ if (RHSPref != LHSPref)
+ mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second);
+ }
// Update the liveintervals of sub-registers.
if (TargetRegisterInfo::isPhysicalRegister(LHS.reg))
@@ -2185,7 +2217,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy?
+ if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy?
continue;
// DstReg is known to be a register in the LHS interval. If the src is
@@ -2202,7 +2234,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->def == ~1U || VNI->copy == 0) // Src not defined by a copy?
+ if (VNI->isUnused() || VNI->copy == 0) // Src not defined by a copy?
continue;
// DstReg is known to be a register in the RHS interval. If the src is
@@ -2222,7 +2254,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
i != e; ++i) {
VNInfo *VNI = *i;
unsigned VN = VNI->id;
- if (LHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+ if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
continue;
ComputeUltimateVN(VNI, NewVNInfo,
LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
@@ -2232,7 +2264,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
i != e; ++i) {
VNInfo *VNI = *i;
unsigned VN = VNI->id;
- if (RHSValNoAssignments[VN] >= 0 || VNI->def == ~1U)
+ if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
continue;
// If this value number isn't a copy from the LHS, it's a new number.
if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
@@ -2296,7 +2328,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
VNInfo *VNI = I->first;
unsigned LHSValID = LHSValNoAssignments[VNI->id];
LiveInterval::removeKill(NewVNInfo[LHSValID], VNI->def);
- NewVNInfo[LHSValID]->hasPHIKill |= VNI->hasPHIKill;
+ if (VNI->hasPHIKill())
+ NewVNInfo[LHSValID]->setHasPHIKill(true);
RHS.addKills(NewVNInfo[LHSValID], VNI->kills);
}
@@ -2306,7 +2339,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
VNInfo *VNI = I->first;
unsigned RHSValID = RHSValNoAssignments[VNI->id];
LiveInterval::removeKill(NewVNInfo[RHSValID], VNI->def);
- NewVNInfo[RHSValID]->hasPHIKill |= VNI->hasPHIKill;
+ if (VNI->hasPHIKill())
+ NewVNInfo[RHSValID]->setHasPHIKill(true);
LHS.addKills(NewVNInfo[RHSValID], VNI->kills);
}
@@ -2315,10 +2349,12 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
if ((RHS.ranges.size() > LHS.ranges.size() &&
TargetRegisterInfo::isVirtualRegister(LHS.reg)) ||
TargetRegisterInfo::isPhysicalRegister(RHS.reg)) {
- RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo);
+ RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo,
+ mri_);
Swapped = true;
} else {
- LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo);
+ LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+ mri_);
Swapped = false;
}
return true;
@@ -2620,6 +2656,11 @@ SimpleRegisterCoalescing::TurnCopyIntoImpDef(MachineBasicBlock::iterator &I,
return false;
LiveInterval &DstInt = li_->getInterval(DstReg);
const LiveRange *DstLR = DstInt.getLiveRangeContaining(CopyIdx);
+ // If the valno extends beyond this basic block, then it's not safe to delete
+ // the val# or else livein information won't be correct.
+ MachineBasicBlock *EndMBB = li_->getMBBFromIndex(DstLR->end);
+ if (EndMBB != MBB)
+ return false;
DstInt.removeValNo(DstLR->valno);
CopyMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
for (int i = CopyMI->getNumOperands() - 1, e = 0; i > e; --i)
@@ -2800,7 +2841,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
}
// Slightly prefer live interval that has been assigned a preferred reg.
- if (LI.preference)
+ std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
+ if (Hint.first || Hint.second)
LI.weight *= 1.01F;
// Divide the weight of the interval by its size. This encourages
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index a495bfd..d2c5581 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -219,10 +219,10 @@ namespace llvm {
bool CanCoalesceWithImpDef(MachineInstr *CopyMI,
LiveInterval &li, LiveInterval &ImpLi) const;
- /// RemoveCopiesFromValNo - The specified value# is defined by an implicit
- /// def and it is being removed. Turn all copies from this value# into
- /// identity copies so they will be removed.
- void RemoveCopiesFromValNo(LiveInterval &li, VNInfo *VNI);
+ /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an
+ /// implicit_def and it is being removed. Turn all copies from this value#
+ /// into implicit_defs.
+ void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI);
/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a
/// a virtual destination register with physical source register.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index ce63121..919a0ce 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -39,7 +39,8 @@ protected:
VirtRegMap *vrm;
/// Construct a spiller base.
- SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+ SpillerBase(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+ VirtRegMap *vrm) :
mf(mf), lis(lis), ls(ls), vrm(vrm)
{
mfi = mf->getFrameInfo();
@@ -47,16 +48,24 @@ protected:
tii = mf->getTarget().getInstrInfo();
}
- /// Insert a store of the given vreg to the given stack slot immediately
- /// after the given instruction. Returns the base index of the inserted
- /// instruction. The caller is responsible for adding an appropriate
- /// LiveInterval to the LiveIntervals analysis.
- unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
- unsigned newVReg,
- const TargetRegisterClass *trc) {
- MachineBasicBlock::iterator nextInstItr(mi);
- ++nextInstItr;
+ /// Ensures there is space before the given machine instruction, returns the
+ /// instruction's new number.
+ unsigned makeSpaceBefore(MachineInstr *mi) {
+ if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+ lis->scaleNumbering(2);
+ ls->scaleNumbering(2);
+ }
+
+ unsigned miIdx = lis->getInstructionIndex(mi);
+ assert(lis->hasGapBeforeInstr(miIdx));
+
+ return miIdx;
+ }
+
+ /// Ensure there is space after the given machine instruction, returns the
+ /// instruction's new number.
+ unsigned makeSpaceAfter(MachineInstr *mi) {
if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
lis->scaleNumbering(2);
ls->scaleNumbering(2);
@@ -66,7 +75,24 @@ protected:
assert(lis->hasGapAfterInstr(miIdx));
- tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, newVReg,
+ return miIdx;
+ }
+
+
+ /// Insert a store of the given vreg to the given stack slot immediately
+ /// after the given instruction. Returns the base index of the inserted
+ /// instruction. The caller is responsible for adding an appropriate
+ /// LiveInterval to the LiveIntervals analysis.
+ unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+ unsigned vreg,
+ const TargetRegisterClass *trc) {
+
+ MachineBasicBlock::iterator nextInstItr(mi);
+ ++nextInstItr;
+
+ unsigned miIdx = makeSpaceAfter(mi);
+
+ tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
true, ss, trc);
MachineBasicBlock::iterator storeInstItr(mi);
++storeInstItr;
@@ -81,25 +107,35 @@ protected:
return storeInstIdx;
}
+ void insertStoreOnInterval(LiveInterval *li,
+ MachineInstr *mi, unsigned ss,
+ unsigned vreg,
+ const TargetRegisterClass *trc) {
+
+ unsigned storeInstIdx = insertStoreFor(mi, ss, vreg, trc);
+ unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
+ end = lis->getUseIndex(storeInstIdx);
+
+ VNInfo *vni =
+ li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
+ vni->kills.push_back(storeInstIdx);
+ LiveRange lr(start, end, vni);
+
+ li->addRange(lr);
+ }
+
/// Insert a load of the given veg from the given stack slot immediately
/// before the given instruction. Returns the base index of the inserted
/// instruction. The caller is responsible for adding an appropriate
/// LiveInterval to the LiveIntervals analysis.
unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
- unsigned newVReg,
+ unsigned vreg,
const TargetRegisterClass *trc) {
MachineBasicBlock::iterator useInstItr(mi);
-
- if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
- lis->scaleNumbering(2);
- ls->scaleNumbering(2);
- }
-
- unsigned miIdx = lis->getInstructionIndex(mi);
-
- assert(lis->hasGapBeforeInstr(miIdx));
-
- tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, newVReg, ss, trc);
+
+ unsigned miIdx = makeSpaceBefore(mi);
+
+ tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, vreg, ss, trc);
MachineBasicBlock::iterator loadInstItr(mi);
--loadInstItr;
MachineInstr *loadInst = &*loadInstItr;
@@ -113,6 +149,24 @@ protected:
return loadInstIdx;
}
+ void insertLoadOnInterval(LiveInterval *li,
+ MachineInstr *mi, unsigned ss,
+ unsigned vreg,
+ const TargetRegisterClass *trc) {
+
+ unsigned loadInstIdx = insertLoadFor(mi, ss, vreg, trc);
+ unsigned start = lis->getDefIndex(loadInstIdx),
+ end = lis->getUseIndex(lis->getInstructionIndex(mi));
+
+ VNInfo *vni =
+ li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
+ vni->kills.push_back(lis->getInstructionIndex(mi));
+ LiveRange lr(start, end, vni);
+
+ li->addRange(lr);
+ }
+
+
/// Add spill ranges for every use/def of the live interval, inserting loads
/// immediately before each use, and stores after each def. No folding is
@@ -173,35 +227,16 @@ protected:
assert(hasUse || hasDef);
if (hasUse) {
- unsigned loadInstIdx = insertLoadFor(mi, ss, newVReg, trc);
- unsigned start = lis->getDefIndex(loadInstIdx),
- end = lis->getUseIndex(lis->getInstructionIndex(mi));
-
- VNInfo *vni =
- newLI->getNextValue(loadInstIdx, 0, lis->getVNInfoAllocator());
- vni->kills.push_back(lis->getInstructionIndex(mi));
- LiveRange lr(start, end, vni);
-
- newLI->addRange(lr);
+ insertLoadOnInterval(newLI, mi, ss, newVReg, trc);
}
if (hasDef) {
- unsigned storeInstIdx = insertStoreFor(mi, ss, newVReg, trc);
- unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
- end = lis->getUseIndex(storeInstIdx);
-
- VNInfo *vni =
- newLI->getNextValue(storeInstIdx, 0, lis->getVNInfoAllocator());
- vni->kills.push_back(storeInstIdx);
- LiveRange lr(start, end, vni);
-
- newLI->addRange(lr);
+ insertStoreOnInterval(newLI, mi, ss, newVReg, trc);
}
added.push_back(newLI);
}
-
return added;
}
@@ -212,13 +247,44 @@ protected:
/// folding.
class TrivialSpiller : public SpillerBase {
public:
- TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls, VirtRegMap *vrm) :
+
+ TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, LiveStacks *ls,
+ VirtRegMap *vrm) :
SpillerBase(mf, lis, ls, vrm) {}
std::vector<LiveInterval*> spill(LiveInterval *li) {
return trivialSpillEverywhere(li);
}
+ std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno) {
+ std::vector<LiveInterval*> spillIntervals;
+ MachineBasicBlock::iterator storeInsertPoint;
+
+ if (valno->isDefAccurate()) {
+ // If we have an accurate def we can just grab an iterator to the instr
+ // after the def.
+ storeInsertPoint =
+ next(MachineBasicBlock::iterator(lis->getInstructionFromIndex(valno->def)));
+ } else {
+ // If the def info isn't accurate we check if this is a PHI def.
+ // If it is then def holds the index of the defining Basic Block, and we
+ // can use that to get an insertion point.
+ if (valno->isPHIDef()) {
+
+ } else {
+ // We have no usable def info. We can't split this value sensibly.
+ // FIXME: Need sensible feedback for "failure to split", an empty
+ // set of spill intervals could be reasonably returned from a
+ // split where both the store and load are folded.
+ return spillIntervals;
+ }
+ }
+
+
+
+ return spillIntervals;
+ }
+
};
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index cad054d..9c3900d 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -13,11 +13,14 @@
#include <vector>
namespace llvm {
+
class LiveInterval;
class LiveIntervals;
class LiveStacks;
class MachineFunction;
+ class MachineInstr;
class VirtRegMap;
+ class VNInfo;
/// Spiller interface.
///
@@ -26,7 +29,15 @@ namespace llvm {
class Spiller {
public:
virtual ~Spiller() = 0;
+
+ /// Spill the given live range. The method used will depend on the Spiller
+ /// implementation selected.
virtual std::vector<LiveInterval*> spill(LiveInterval *li) = 0;
+
+ /// Intra-block split.
+ virtual std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li,
+ VNInfo *valno) = 0;
+
};
/// Create and return a spiller object, as specified on the command line.
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index a2c1255..ca99528 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -827,7 +827,7 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
// Add a live range for the new vreg
LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
VNInfo* FirstVN = *Int.vni_begin();
- FirstVN->hasPHIKill = false;
+ FirstVN->setHasPHIKill(false);
if (I->getOperand(i).isKill())
FirstVN->kills.push_back(
LiveIntervals::getUseIndex(LI.getInstructionIndex(I)));
@@ -886,10 +886,7 @@ bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
VNInfo* OldVN = R.valno;
VNInfo*& NewVN = VNMap[OldVN];
if (!NewVN) {
- NewVN = LHS.getNextValue(OldVN->def,
- OldVN->copy,
- LI.getVNInfoAllocator());
- NewVN->kills = OldVN->kills;
+ NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
}
LiveRange LR (R.start, R.end, NewVN);
@@ -987,7 +984,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
LiveInterval& Int = LI.getOrCreateInterval(I->first);
const LiveRange* LR =
Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
- LR->valno->hasPHIKill = true;
+ LR->valno->setHasPHIKill(true);
I->second.erase(SI->first);
}
@@ -1037,7 +1034,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
// now has an unknown def.
unsigned idx = LI.getDefIndex(LI.getInstructionIndex(PInstr));
const LiveRange* PLR = PI.getLiveRangeContaining(idx);
- PLR->valno->def = ~0U;
+ PLR->valno->setIsPHIDef(true);
LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
PLR->start, PLR->valno);
PI.addRange(R);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 29637b9..4d3417f 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -51,6 +51,7 @@ static RegisterPass<VirtRegMap>
X("virtregmap", "Virtual Register Map");
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
+ MRI = &mf.getRegInfo();
TII = mf.getTarget().getInstrInfo();
TRI = mf.getTarget().getRegisterInfo();
MF = &mf;
@@ -98,6 +99,18 @@ void VirtRegMap::grow() {
ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
}
+unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
+ unsigned physReg = Hint.second;
+ if (physReg &&
+ TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+ physReg = getPhys(physReg);
+ if (Hint.first == 0)
+ return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+ ? physReg : 0;
+ return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
+}
+
int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
assert(TargetRegisterInfo::isVirtualRegister(virtReg));
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
@@ -213,8 +226,7 @@ void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) {
/// FindUnusedRegisters - Gather a list of allocatable registers that
/// have not been allocated to any virtual register.
-bool VirtRegMap::FindUnusedRegisters(const TargetRegisterInfo *TRI,
- LiveIntervals* LIs) {
+bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
unsigned NumRegs = TRI->getNumRegs();
UnusedRegs.reset();
UnusedRegs.resize(NumRegs);
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 507557d..fe767b7 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -31,6 +31,7 @@ namespace llvm {
class LiveIntervals;
class MachineInstr;
class MachineFunction;
+ class MachineRegisterInfo;
class TargetInstrInfo;
class TargetRegisterInfo;
@@ -47,6 +48,7 @@ namespace llvm {
std::pair<unsigned, ModRef> > MI2VirtMapTy;
private:
+ MachineRegisterInfo *MRI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
MachineFunction *MF;
@@ -190,6 +192,9 @@ namespace llvm {
grow();
}
+ /// @brief returns the register allocation preference.
+ unsigned getRegAllocPref(unsigned virtReg);
+
/// @brief records virtReg is a split live interval from SReg.
void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
Virt2SplitMap[virtReg] = SReg;
@@ -445,8 +450,7 @@ namespace llvm {
/// FindUnusedRegisters - Gather a list of allocatable registers that
/// have not been allocated to any virtual register.
- bool FindUnusedRegisters(const TargetRegisterInfo *TRI,
- LiveIntervals* LIs);
+ bool FindUnusedRegisters(LiveIntervals* LIs);
/// HasUnusedRegisters - Return true if there are any allocatable registers
/// that have not been allocated to any virtual register.
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 83397a58..401a226 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -118,7 +118,7 @@ int LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
char **OutError) {
std::string Error;
if (ExecutionEngine *JIT =
- ExecutionEngine::createJIT(unwrap(MP), &Error, 0,
+ ExecutionEngine::create(unwrap(MP), false, &Error,
(CodeGenOpt::Level)OptLevel)) {
*OutJIT = wrap(JIT);
return 0;
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index 7c8ce70..e7a76cc 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_library(LLVMSupport
PrettyStackTrace.cpp
SlowOperationInformer.cpp
SmallPtrSet.cpp
+ SourceMgr.cpp
Statistic.cpp
Streams.cpp
StringExtras.cpp
diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp
index 6de6575..4e655a0 100644
--- a/lib/Support/ManagedStatic.cpp
+++ b/lib/Support/ManagedStatic.cpp
@@ -14,18 +14,15 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Config/config.h"
#include "llvm/System/Atomic.h"
-#include "llvm/System/Mutex.h"
#include <cassert>
using namespace llvm;
static const ManagedStaticBase *StaticList = 0;
-static sys::Mutex* ManagedStaticMutex = 0;
-
void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
void (*Deleter)(void*)) const {
- if (ManagedStaticMutex) {
- ManagedStaticMutex->acquire();
+ if (llvm_is_multithreaded()) {
+ llvm_acquire_global_lock();
if (Ptr == 0) {
void* tmp = Creator ? Creator() : 0;
@@ -39,7 +36,7 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(),
StaticList = this;
}
- ManagedStaticMutex->release();
+ llvm_release_global_lock();
} else {
assert(Ptr == 0 && DeleterFn == 0 && Next == 0 &&
"Partially initialized ManagedStatic!?");
@@ -68,24 +65,11 @@ void ManagedStaticBase::destroy() const {
DeleterFn = 0;
}
-bool llvm::llvm_start_multithreaded() {
-#if LLVM_MULTITHREADED
- assert(ManagedStaticMutex == 0 && "Multithreaded LLVM already initialized!");
- ManagedStaticMutex = new sys::Mutex(true);
- return true;
-#else
- return false;
-#endif
-}
-
/// llvm_shutdown - Deallocate and destroy all ManagedStatic variables.
void llvm::llvm_shutdown() {
while (StaticList)
StaticList->destroy();
- if (ManagedStaticMutex) {
- delete ManagedStaticMutex;
- ManagedStaticMutex = 0;
- }
+ if (llvm_is_multithreaded()) llvm_stop_multithreaded();
}
diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp
new file mode 100644
index 0000000..d789f10
--- /dev/null
+++ b/lib/Support/SourceMgr.cpp
@@ -0,0 +1,127 @@
+//===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the SourceMgr class. This class is used as a simple
+// substrate for diagnostics, #include handling, and other low level things for
+// simple parsers.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+SourceMgr::~SourceMgr() {
+ while (!Buffers.empty()) {
+ delete Buffers.back().Buffer;
+ Buffers.pop_back();
+ }
+}
+
+/// AddIncludeFile - Search for a file with the specified name in the current
+/// directory or in one of the IncludeDirs. If no file is found, this returns
+/// ~0, otherwise it returns the buffer ID of the stacked file.
+unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
+ SMLoc IncludeLoc) {
+
+ MemoryBuffer *NewBuf = MemoryBuffer::getFile(Filename.c_str());
+
+ // If the file didn't exist directly, see if it's in an include path.
+ for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) {
+ std::string IncFile = IncludeDirectories[i] + "/" + Filename;
+ NewBuf = MemoryBuffer::getFile(IncFile.c_str());
+ }
+
+ if (NewBuf == 0) return ~0U;
+
+ return AddNewSourceBuffer(NewBuf, IncludeLoc);
+}
+
+
+/// FindBufferContainingLoc - Return the ID of the buffer containing the
+/// specified location, returning -1 if not found.
+int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
+ for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
+ if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
+ // Use <= here so that a pointer to the null at the end of the buffer
+ // is included as part of the buffer.
+ Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
+ return i;
+ return -1;
+}
+
+/// FindLineNumber - Find the line number for the specified location in the
+/// specified file. This is not a fast method.
+unsigned SourceMgr::FindLineNumber(SMLoc Loc, int BufferID) const {
+ if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc);
+ assert(BufferID != -1 && "Invalid Location!");
+
+ MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer;
+
+ // Count the number of \n's between the start of the file and the specified
+ // location.
+ unsigned LineNo = 1;
+
+ const char *Ptr = Buff->getBufferStart();
+
+ for (; SMLoc::getFromPointer(Ptr) != Loc; ++Ptr)
+ if (*Ptr == '\n') ++LineNo;
+ return LineNo;
+}
+
+void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc) const {
+ if (IncludeLoc == SMLoc()) return; // Top of stack.
+
+ int CurBuf = FindBufferContainingLoc(IncludeLoc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+ PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
+
+ errs() << "Included from "
+ << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
+ << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
+}
+
+
+void SourceMgr::PrintMessage(SMLoc Loc, const std::string &Msg) const {
+ raw_ostream &OS = errs();
+
+ // First thing to do: find the current buffer containing the specified
+ // location.
+ int CurBuf = FindBufferContainingLoc(Loc);
+ assert(CurBuf != -1 && "Invalid or unspecified location!");
+
+ PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc);
+
+ MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer;
+
+
+ OS << "Parsing " << CurMB->getBufferIdentifier() << ":"
+ << FindLineNumber(Loc, CurBuf) << ": ";
+
+ OS << Msg << "\n";
+
+ // Scan backward to find the start of the line.
+ const char *LineStart = Loc.getPointer();
+ while (LineStart != CurMB->getBufferStart() &&
+ LineStart[-1] != '\n' && LineStart[-1] != '\r')
+ --LineStart;
+ // Get the end of the line.
+ const char *LineEnd = Loc.getPointer();
+ while (LineEnd != CurMB->getBufferEnd() &&
+ LineEnd[0] != '\n' && LineEnd[0] != '\r')
+ ++LineEnd;
+ // Print out the line.
+ OS << std::string(LineStart, LineEnd) << "\n";
+ // Print out spaces before the caret.
+ for (const char *Pos = LineStart; Pos != Loc.getPointer(); ++Pos)
+ OS << (*Pos == '\t' ? '\t' : ' ');
+ OS << "^\n";
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index e8cf69d..dd5c3d6 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -43,6 +43,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
switch (Kind) {
case UnknownOS: return "unknown";
+ case AuroraUX: return "auroraux";
case Darwin: return "darwin";
case DragonFly: return "dragonfly";
case FreeBSD: return "freebsd";
@@ -79,7 +80,9 @@ void Triple::Parse() const {
Vendor = UnknownVendor;
std::string OSName = getOSName();
- if (memcmp(&OSName[0], "darwin", 6) == 0)
+ if (memcmp(&OSName[0], "auroraux", 8) == 0)
+ OS = AuroraUX;
+ else if (memcmp(&OSName[0], "darwin", 6) == 0)
OS = Darwin;
else if (memcmp(&OSName[0], "dragonfly", 9) == 0)
OS = DragonFly;
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index 2827d88..416f981 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -51,3 +51,31 @@ sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
# error No compare-and-swap implementation for your platform!
#endif
}
+
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+ ++(*ptr);
+ return *ptr;
+#elif defined(__GNUC__)
+ return __sync_add_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+ return InterlockedIncrement(ptr);
+#else
+# error No atomic increment implementation for your platform!
+#endif
+}
+
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
+#if LLVM_MULTITHREADED==0
+ --(*ptr);
+ return *ptr;
+#elif defined(__GNUC__)
+ return __sync_sub_and_fetch(ptr, 1);
+#elif defined(_MSC_VER)
+ return InterlockedDecrement(ptr);
+#else
+# error No atomic decrement implementation for your platform!
+#endif
+}
+
+
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index 5415dd6..a5a56e8 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -10,7 +10,9 @@ add_llvm_library(LLVMSystem
Path.cpp
Process.cpp
Program.cpp
+ RWMutex.cpp
Signals.cpp
+ Threading.cpp
TimeValue.cpp
)
diff --git a/lib/System/Mutex.cpp b/lib/System/Mutex.cpp
index d95c25b..a5e9920 100644
--- a/lib/System/Mutex.cpp
+++ b/lib/System/Mutex.cpp
@@ -23,11 +23,11 @@
// Define all methods as no-ops if threading is explicitly disabled
namespace llvm {
using namespace sys;
-Mutex::Mutex( bool recursive) { }
-Mutex::~Mutex() { }
-bool Mutex::acquire() { return true; }
-bool Mutex::release() { return true; }
-bool Mutex::tryacquire() { return true; }
+MutexImpl::MutexImpl( bool recursive) { }
+MutexImpl::~MutexImpl() { }
+bool MutexImpl::acquire() { return true; }
+bool MutexImpl::release() { return true; }
+bool MutexImpl::tryacquire() { return true; }
}
#else
@@ -55,7 +55,7 @@ using namespace sys;
static const bool pthread_enabled = true;
// Construct a Mutex using pthread calls
-Mutex::Mutex( bool recursive)
+MutexImpl::MutexImpl( bool recursive)
: data_(0)
{
if (pthread_enabled)
@@ -94,7 +94,7 @@ Mutex::Mutex( bool recursive)
}
// Destruct a Mutex
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
{
if (pthread_enabled)
{
@@ -106,7 +106,7 @@ Mutex::~Mutex()
}
bool
-Mutex::acquire()
+MutexImpl::acquire()
{
if (pthread_enabled)
{
@@ -120,7 +120,7 @@ Mutex::acquire()
}
bool
-Mutex::release()
+MutexImpl::release()
{
if (pthread_enabled)
{
@@ -134,7 +134,7 @@ Mutex::release()
}
bool
-Mutex::tryacquire()
+MutexImpl::tryacquire()
{
if (pthread_enabled)
{
diff --git a/lib/System/RWMutex.cpp b/lib/System/RWMutex.cpp
new file mode 100644
index 0000000..15d98cb
--- /dev/null
+++ b/lib/System/RWMutex.cpp
@@ -0,0 +1,175 @@
+//===- RWMutex.cpp - Reader/Writer Mutual Exclusion Lock --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/RWMutex.h"
+#include <cstring>
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//=== independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+RWMutexImpl::RWMutexImpl() { }
+RWMutexImpl::~RWMutexImpl() { }
+bool RWMutexImpl::reader_acquire() { return true; }
+bool RWMutexImpl::reader_release() { return true; }
+bool RWMutexImpl::writer_acquire() { return true; }
+bool RWMutexImpl::writer_release() { return true; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_RWLOCK_INIT)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+
+// This variable is useful for situations where the pthread library has been
+// compiled with weak linkage for its interface symbols. This allows the
+// threading support to be turned off by simply not linking against -lpthread.
+// In that situation, the value of pthread_mutex_init will be 0 and
+// consequently pthread_enabled will be false. In such situations, all the
+// pthread operations become no-ops and the functions all return false. If
+// pthread_rwlock_init does have an address, then rwlock support is enabled.
+// Note: all LLVM tools will link against -lpthread if its available since it
+// is configured into the LIBS variable.
+// Note: this line of code generates a warning if pthread_rwlock_init is not
+// declared with weak linkage. It's safe to ignore the warning.
+static const bool pthread_enabled = true;
+
+// Construct a RWMutex using pthread calls
+RWMutexImpl::RWMutexImpl()
+ : data_(0)
+{
+ if (pthread_enabled)
+ {
+ // Declare the pthread_rwlock data structures
+ pthread_rwlock_t* rwlock =
+ static_cast<pthread_rwlock_t*>(malloc(sizeof(pthread_rwlock_t)));
+
+#ifdef __APPLE__
+ // Workaround a bug/mis-feature in Darwin's pthread_rwlock_init.
+ bzero(rwlock, sizeof(pthread_rwlock_t));
+#endif
+
+ pthread_rwlockattr_t attr;
+
+ // Initialize the rwlock attributes
+ int errorcode = pthread_rwlockattr_init(&attr);
+ assert(errorcode == 0);
+
+#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) && !defined(__DragonFly__)
+ // Make it a process local rwlock
+ errorcode = pthread_rwlockattr_setpshared(&attr, PTHREAD_PROCESS_PRIVATE);
+#endif
+
+ // Initialize the rwlock
+ errorcode = pthread_rwlock_init(rwlock, &attr);
+ assert(errorcode == 0);
+
+ // Destroy the attributes
+ errorcode = pthread_rwlockattr_destroy(&attr);
+ assert(errorcode == 0);
+
+ // Assign the data member
+ data_ = rwlock;
+ }
+}
+
+// Destruct a RWMutex
+RWMutexImpl::~RWMutexImpl()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+ pthread_rwlock_destroy(rwlock);
+ free(rwlock);
+ }
+}
+
+bool
+RWMutexImpl::reader_acquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_rdlock(rwlock);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+bool
+RWMutexImpl::reader_release()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+bool
+RWMutexImpl::writer_acquire()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_wrlock(rwlock);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+bool
+RWMutexImpl::writer_release()
+{
+ if (pthread_enabled)
+ {
+ pthread_rwlock_t* rwlock = static_cast<pthread_rwlock_t*>(data_);
+ assert(rwlock != 0);
+
+ int errorcode = pthread_rwlock_unlock(rwlock);
+ return errorcode == 0;
+ }
+ return false;
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/RWMutex.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Win32/RWMutex.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/Mutex.cpp
+#endif
+#endif
diff --git a/lib/System/Threading.cpp b/lib/System/Threading.cpp
new file mode 100644
index 0000000..a2d7f82
--- /dev/null
+++ b/lib/System/Threading.cpp
@@ -0,0 +1,63 @@
+//===-- llvm/System/Threading.cpp- Control multithreading mode --*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements llvm_start_multithreaded() and friends.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/System/Threading.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
+#include <cassert>
+
+using namespace llvm;
+
+static bool multithreaded_mode = false;
+
+static sys::Mutex* global_lock = 0;
+
+bool llvm::llvm_start_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+ assert(!multithreaded_mode && "Already multithreaded!");
+ multithreaded_mode = true;
+ global_lock = new sys::Mutex(true);
+
+ // We fence here to ensure that all initialization is complete BEFORE we
+ // return from llvm_start_multithreaded().
+ sys::MemoryFence();
+ return true;
+#else
+ return false;
+#endif
+}
+
+void llvm::llvm_stop_multithreaded() {
+#ifdef LLVM_MULTITHREADED
+ assert(multithreaded_mode && "Not currently multithreaded!");
+
+ // We fence here to insure that all threaded operations are complete BEFORE we
+ // return from llvm_stop_multithreaded().
+ sys::MemoryFence();
+
+ multithreaded_mode = false;
+ delete global_lock;
+#endif
+}
+
+bool llvm::llvm_is_multithreaded() {
+ return multithreaded_mode;
+}
+
+void llvm::llvm_acquire_global_lock() {
+ if (multithreaded_mode) global_lock->acquire();
+}
+
+void llvm::llvm_release_global_lock() {
+ if (multithreaded_mode) global_lock->release();
+}
diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc
index 4a015a6..10e7ecb 100644
--- a/lib/System/Unix/Mutex.inc
+++ b/lib/System/Unix/Mutex.inc
@@ -20,28 +20,28 @@ namespace llvm
{
using namespace sys;
-Mutex::Mutex( bool recursive)
+MutexImpl::MutexImpl( bool recursive)
{
}
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
{
}
bool
-Mutex::acquire()
+MutexImpl::MutexImpl()
{
return true;
}
bool
-Mutex::release()
+MutexImpl::release()
{
return true;
}
bool
-Mutex::tryacquire( void )
+MutexImpl::tryacquire( void )
{
return true;
}
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index d5edee1..1f73571 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -104,6 +104,14 @@ Path::isValid() const {
}
bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+ assert(NameStart);
+ if (NameLen == 0)
+ return false;
+ return NameStart[0] == '/';
+}
+
+bool
Path::isAbsolute() const {
if (path.empty())
return false;
diff --git a/lib/System/Unix/RWMutex.inc b/lib/System/Unix/RWMutex.inc
new file mode 100644
index 0000000..e83d41e
--- /dev/null
+++ b/lib/System/Unix/RWMutex.inc
@@ -0,0 +1,43 @@
+//= llvm/System/Unix/RWMutex.inc - Unix Reader/Writer Mutual Exclusion Lock =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//=== is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() { }
+
+RWMutexImpl::~RWMutexImpl() { }
+
+bool RWMutexImpl::reader_acquire() {
+ return true;
+}
+
+bool RWMutexImpl::reader_release() {
+ return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+ return true;
+}
+
+bool RWMutexImpl::writer_release() {
+ return true;
+}
+
+}
diff --git a/lib/System/Unix/Unix.h b/lib/System/Unix/Unix.h
index 452226f..c2c06dd 100644
--- a/lib/System/Unix/Unix.h
+++ b/lib/System/Unix/Unix.h
@@ -79,12 +79,19 @@ static inline bool MakeErrMsg(
return true;
char buffer[MAXPATHLEN];
buffer[0] = 0;
+ char* str = buffer;
if (errnum == -1)
errnum = errno;
#ifdef HAVE_STRERROR_R
// strerror_r is thread-safe.
if (errnum)
+# if defined(__GLIBC__) && defined(_GNU_SOURCE)
+ // glibc defines its own incompatible version of strerror_r
+ // which may not use the buffer supplied.
+ str = strerror_r(errnum,buffer,MAXPATHLEN-1);
+# else
strerror_r(errnum,buffer,MAXPATHLEN-1);
+# endif
#elif HAVE_STRERROR
// Copy the thread un-safe result of strerror into
// the buffer as fast as possible to minimize impact
@@ -97,7 +104,7 @@ static inline bool MakeErrMsg(
// but, oh well, just use a generic message
sprintf(buffer, "Error #%d", errnum);
#endif
- *ErrMsg = prefix + ": " + buffer;
+ *ErrMsg = prefix + ": " + str;
return true;
}
diff --git a/lib/System/Win32/Mutex.inc b/lib/System/Win32/Mutex.inc
index 7c1723b..75f01fe 100644
--- a/lib/System/Win32/Mutex.inc
+++ b/lib/System/Win32/Mutex.inc
@@ -22,13 +22,13 @@
namespace llvm {
using namespace sys;
-Mutex::Mutex(bool /*recursive*/)
+MutexImpl::MutexImpl(bool /*recursive*/)
{
data_ = new CRITICAL_SECTION;
InitializeCriticalSection((LPCRITICAL_SECTION)data_);
}
-Mutex::~Mutex()
+MutexImpl::~MutexImpl()
{
DeleteCriticalSection((LPCRITICAL_SECTION)data_);
delete (LPCRITICAL_SECTION)data_;
@@ -36,21 +36,21 @@ Mutex::~Mutex()
}
bool
-Mutex::acquire()
+MutexImpl::acquire()
{
EnterCriticalSection((LPCRITICAL_SECTION)data_);
return true;
}
bool
-Mutex::release()
+MutexImpl::release()
{
LeaveCriticalSection((LPCRITICAL_SECTION)data_);
return true;
}
bool
-Mutex::tryacquire()
+MutexImpl::tryacquire()
{
return TryEnterCriticalSection((LPCRITICAL_SECTION)data_);
}
diff --git a/lib/System/Win32/Path.inc b/lib/System/Win32/Path.inc
index fbf8f66..683c94b 100644
--- a/lib/System/Win32/Path.inc
+++ b/lib/System/Win32/Path.inc
@@ -125,6 +125,20 @@ Path::isValid() const {
return true;
}
+bool
+Path::isAbsolute(const char *NameStart, unsigned NameLen) {
+ assert(NameStart);
+ switch (NameLen) {
+ case 0:
+ return false;
+ case 1:
+ case 2:
+ return NameStart[0] == '/';
+ default:
+ return NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/');
+ }
+}
+
bool
Path::isAbsolute() const {
switch (path.length()) {
@@ -234,7 +248,9 @@ Path::GetCurrentDirectory() {
/// GetMainExecutable - Return the path to the main executable, given the
/// value of argv[0] from program startup.
Path Path::GetMainExecutable(const char *argv0, void *MainAddr) {
- return Path();
+ char pathname[MAX_PATH];
+ DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH);
+ return ret != MAX_PATH ? Path(pathname) : Path();
}
diff --git a/lib/System/Win32/RWMutex.inc b/lib/System/Win32/RWMutex.inc
new file mode 100644
index 0000000..e269226
--- /dev/null
+++ b/lib/System/Win32/RWMutex.inc
@@ -0,0 +1,58 @@
+//= llvm/System/Win32/Mutex.inc - Win32 Reader/Writer Mutual Exclusion Lock =//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) RWMutex class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//=== is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+
+// FIXME: Windows does not have reader-writer locks pre-Vista. If you want
+// real reader-writer locks, you a pthreads implementation for Windows.
+
+namespace llvm {
+using namespace sys;
+
+RWMutexImpl::RWMutexImpl() {
+ data_ = calloc(1, sizeof(CRITICAL_SECTION));
+ InitializeCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+}
+
+RWMutexImpl::~RWMutexImpl() {
+ DeleteCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ free(data_);
+}
+
+bool RWMutexImpl::reader_acquire() {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+bool RWMutexImpl::reader_release() {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+bool RWMutexImpl::writer_acquire() {
+ EnterCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+bool RWMutexImpl::writer_release() {
+ LeaveCriticalSection(static_cast<LPCRITICAL_SECTION>(data_));
+ return true;
+}
+
+
+}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 594811d..9001e50 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -45,62 +45,72 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
// ARM Processors supported.
//
-class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, NoItineraries, Features>;
+include "ARMSchedule.td"
+
+class ProcNoItin<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, GenericItineraries, Features>;
// V4 Processors.
-def : Proc<"generic", []>;
-def : Proc<"arm8", []>;
-def : Proc<"arm810", []>;
-def : Proc<"strongarm", []>;
-def : Proc<"strongarm110", []>;
-def : Proc<"strongarm1100", []>;
-def : Proc<"strongarm1110", []>;
+def : ProcNoItin<"generic", []>;
+def : ProcNoItin<"arm8", []>;
+def : ProcNoItin<"arm810", []>;
+def : ProcNoItin<"strongarm", []>;
+def : ProcNoItin<"strongarm110", []>;
+def : ProcNoItin<"strongarm1100", []>;
+def : ProcNoItin<"strongarm1110", []>;
// V4T Processors.
-def : Proc<"arm7tdmi", [ArchV4T]>;
-def : Proc<"arm7tdmi-s", [ArchV4T]>;
-def : Proc<"arm710t", [ArchV4T]>;
-def : Proc<"arm720t", [ArchV4T]>;
-def : Proc<"arm9", [ArchV4T]>;
-def : Proc<"arm9tdmi", [ArchV4T]>;
-def : Proc<"arm920", [ArchV4T]>;
-def : Proc<"arm920t", [ArchV4T]>;
-def : Proc<"arm922t", [ArchV4T]>;
-def : Proc<"arm940t", [ArchV4T]>;
-def : Proc<"ep9312", [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi", [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi-s", [ArchV4T]>;
+def : ProcNoItin<"arm710t", [ArchV4T]>;
+def : ProcNoItin<"arm720t", [ArchV4T]>;
+def : ProcNoItin<"arm9", [ArchV4T]>;
+def : ProcNoItin<"arm9tdmi", [ArchV4T]>;
+def : ProcNoItin<"arm920", [ArchV4T]>;
+def : ProcNoItin<"arm920t", [ArchV4T]>;
+def : ProcNoItin<"arm922t", [ArchV4T]>;
+def : ProcNoItin<"arm940t", [ArchV4T]>;
+def : ProcNoItin<"ep9312", [ArchV4T]>;
// V5T Processors.
-def : Proc<"arm10tdmi", [ArchV5T]>;
-def : Proc<"arm1020t", [ArchV5T]>;
+def : ProcNoItin<"arm10tdmi", [ArchV5T]>;
+def : ProcNoItin<"arm1020t", [ArchV5T]>;
// V5TE Processors.
-def : Proc<"arm9e", [ArchV5TE]>;
-def : Proc<"arm926ej-s", [ArchV5TE]>;
-def : Proc<"arm946e-s", [ArchV5TE]>;
-def : Proc<"arm966e-s", [ArchV5TE]>;
-def : Proc<"arm968e-s", [ArchV5TE]>;
-def : Proc<"arm10e", [ArchV5TE]>;
-def : Proc<"arm1020e", [ArchV5TE]>;
-def : Proc<"arm1022e", [ArchV5TE]>;
-def : Proc<"xscale", [ArchV5TE]>;
-def : Proc<"iwmmxt", [ArchV5TE]>;
+def : ProcNoItin<"arm9e", [ArchV5TE]>;
+def : ProcNoItin<"arm926ej-s", [ArchV5TE]>;
+def : ProcNoItin<"arm946e-s", [ArchV5TE]>;
+def : ProcNoItin<"arm966e-s", [ArchV5TE]>;
+def : ProcNoItin<"arm968e-s", [ArchV5TE]>;
+def : ProcNoItin<"arm10e", [ArchV5TE]>;
+def : ProcNoItin<"arm1020e", [ArchV5TE]>;
+def : ProcNoItin<"arm1022e", [ArchV5TE]>;
+def : ProcNoItin<"xscale", [ArchV5TE]>;
+def : ProcNoItin<"iwmmxt", [ArchV5TE]>;
// V6 Processors.
-def : Proc<"arm1136j-s", [ArchV6]>;
-def : Proc<"arm1136jf-s", [ArchV6, FeatureVFP2]>;
-def : Proc<"arm1176jz-s", [ArchV6]>;
-def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>;
-def : Proc<"mpcorenovfp", [ArchV6]>;
-def : Proc<"mpcore", [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136j-s", V6Itineraries,
+ [ArchV6]>;
+def : Processor<"arm1136jf-s", V6Itineraries,
+ [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1176jz-s", V6Itineraries,
+ [ArchV6]>;
+def : Processor<"arm1176jzf-s", V6Itineraries,
+ [ArchV6, FeatureVFP2]>;
+def : Processor<"mpcorenovfp", V6Itineraries,
+ [ArchV6]>;
+def : Processor<"mpcore", V6Itineraries,
+ [ArchV6, FeatureVFP2]>;
// V6T2 Processors.
-def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>;
-def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
+def : Processor<"arm1156t2-s", V6Itineraries,
+ [ArchV6T2, FeatureThumb2]>;
+def : Processor<"arm1156t2f-s", V6Itineraries,
+ [ArchV6T2, FeatureThumb2, FeatureVFP2]>;
// V7 Processors.
-def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>;
-def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>;
+def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
//===----------------------------------------------------------------------===//
// Register File Description
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index f126760..47151e6 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -17,11 +17,6 @@ class CCIfSubtarget<string F, CCAction A>:
class CCIfAlign<string Align, CCAction A>:
CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>;
-/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or
-/// "Soft".
-class CCIfFloatABI<string ABIType, CCAction A>:
- CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>;
-
//===----------------------------------------------------------------------===//
// ARM APCS Calling Convention
//===----------------------------------------------------------------------===//
@@ -105,25 +100,3 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
S9, S10, S11, S12, S13, S14, S15]>>,
CCDelegateTo<RetCC_ARM_AAPCS_Common>
]>;
-
-//===----------------------------------------------------------------------===//
-// ARM Calling Convention Dispatch
-//===----------------------------------------------------------------------===//
-
-def CC_ARM : CallingConv<[
- CCIfSubtarget<"isAAPCS_ABI()",
- CCIfSubtarget<"hasVFP2()",
- CCIfFloatABI<"Hard",
- CCDelegateTo<CC_ARM_AAPCS_VFP>>>>,
- CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>,
- CCDelegateTo<CC_ARM_APCS>
-]>;
-
-def RetCC_ARM : CallingConv<[
- CCIfSubtarget<"isAAPCS_ABI()",
- CCIfSubtarget<"hasVFP2()",
- CCIfFloatABI<"Hard",
- CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>,
- CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>,
- CCDelegateTo<RetCC_ARM_APCS>
-]>;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 44fac12..f6629fe 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -900,6 +900,10 @@ void Emitter<CodeEmitter>::emitMiscLoadStoreInstruction(const MachineInstr &MI,
// Set first operand
Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
+ // Skip LDRD and STRD's second operand.
+ if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+ ++OpIdx;
+
// Set second operand
if (ImplicitRn)
// Special handling for implicit use (e.g. PC).
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index ca3a9cb..1ed9e80 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -52,8 +52,13 @@ public:
virtual const char *getPassName() const {
return "ARM Instruction Selection";
- }
-
+ }
+
+ /// getI32Imm - Return a target constant with the specified value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+
SDNode *Select(SDValue Op);
virtual void InstructionSelect();
bool SelectAddrMode2(SDValue Op, SDValue N, SDValue &Base,
@@ -84,6 +89,9 @@ public:
bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
SDValue &OffImm);
+ bool SelectShifterOperand(SDValue Op, SDValue N,
+ SDValue &BaseReg, SDValue &Opc);
+
bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
SDValue &B, SDValue &C);
@@ -509,8 +517,30 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
return false;
}
+bool ARMDAGToDAGISel::SelectShifterOperand(SDValue Op,
+ SDValue N,
+ SDValue &BaseReg,
+ SDValue &Opc) {
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+ // Don't match base register only case. That is matched to a separate
+ // lower complexity pattern with explicit register operand.
+ if (ShOpcVal == ARM_AM::no_shift) return false;
+
+ BaseReg = N.getOperand(0);
+ unsigned ShImmVal = 0;
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)))
+ ShImmVal = RHS->getZExtValue() & 31;
+ else
+ return false;
+
+ Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+
+ return true;
+}
+
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
- SDValue N,
+ SDValue N,
SDValue &BaseReg,
SDValue &ShReg,
SDValue &Opc) {
@@ -549,6 +579,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
switch (N->getOpcode()) {
default: break;
case ISD::Constant: {
+ // ARMv6T2 and later should materialize imms via MOV / MOVT pair.
+ if (Subtarget->hasV6T2Ops() || Subtarget->hasThumb2())
+ break;
+
unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
bool UseCP = true;
if (Subtarget->isThumb())
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index ec8bd1f..2443625 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -292,6 +292,25 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setIfCvtBlockSizeLimit(Subtarget->isThumb() ? 0 : 10);
setIfCvtDupBlockSizeLimit(Subtarget->isThumb() ? 0 : 2);
+ if (!Subtarget->isThumb()) {
+ // Use branch latency information to determine if-conversion limits.
+ // FIXME: If-converter should use instruction latency of the branch being
+ // eliminated to compute the threshold. For ARMv6, the branch "latency"
+ // varies depending on whether it's dynamically or statically predicted
+ // and on whether the destination is in the prefetch buffer.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ const InstrItineraryData &InstrItins = Subtarget->getInstrItineraryData();
+ unsigned Latency= InstrItins.getLatency(TII->get(ARM::Bcc).getSchedClass());
+ if (Latency > 1) {
+ setIfCvtBlockSizeLimit(Latency-1);
+ if (Latency > 2)
+ setIfCvtDupBlockSizeLimit(Latency-2);
+ } else {
+ setIfCvtBlockSizeLimit(10);
+ setIfCvtDupBlockSizeLimit(2);
+ }
+ }
+
maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type
// Do not enable CodePlacementOpt for now: it currently runs after the
// ARMConstantIslandPass and messes up branch relaxation and placement
@@ -415,7 +434,7 @@ static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
ARM::NoRegister };
unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 4);
- if (Reg == 0)
+ if (Reg == 0)
return false; // we didn't handle it
unsigned i;
@@ -487,6 +506,33 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
State);
}
+/// CCAssignFnForNode - Selects the correct CCAssignFn for a the
+/// given CallingConvention value.
+CCAssignFn *ARMTargetLowering::CCAssignFnForNode(unsigned CC,
+ bool Return) const {
+ switch (CC) {
+ default:
+ assert(0 && "Unsupported calling convention");
+ case CallingConv::C:
+ case CallingConv::Fast:
+ // Use target triple & subtarget features to do actual dispatch.
+ if (Subtarget->isAAPCS_ABI()) {
+ if (Subtarget->hasVFP2() &&
+ FloatABIType == FloatABI::Hard)
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ else
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ } else
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ case CallingConv::ARM_AAPCS_VFP:
+ return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
+ case CallingConv::ARM_AAPCS:
+ return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
+ case CallingConv::ARM_APCS:
+ return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
+ }
+}
+
/// LowerCallResult - Lower the result values of an ISD::CALL into the
/// appropriate copies out of appropriate physical registers. This assumes that
/// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
@@ -501,7 +547,8 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
SmallVector<CCValAssign, 16> RVLocs;
bool isVarArg = TheCall->isVarArg();
CCState CCInfo(CallingConv, isVarArg, getTargetMachine(), RVLocs);
- CCInfo.AnalyzeCallResult(TheCall, RetCC_ARM);
+ CCInfo.AnalyzeCallResult(TheCall,
+ CCAssignFnForNode(CallingConv, /* Return*/ true));
SmallVector<SDValue, 8> ResultVals;
@@ -586,8 +633,6 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
MVT RetVT = TheCall->getRetValType(0);
SDValue Chain = TheCall->getChain();
unsigned CC = TheCall->getCallingConv();
- assert((CC == CallingConv::C ||
- CC == CallingConv::Fast) && "unknown calling convention");
bool isVarArg = TheCall->isVarArg();
SDValue Callee = TheCall->getCallee();
DebugLoc dl = TheCall->getDebugLoc();
@@ -595,7 +640,7 @@ SDValue ARMTargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeCallOperands(TheCall, CC_ARM);
+ CCInfo.AnalyzeCallOperands(TheCall, CCAssignFnForNode(CC, /* Return*/ false));
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -788,7 +833,7 @@ SDValue ARMTargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) {
CCState CCInfo(CC, isVarArg, getTargetMachine(), RVLocs);
// Analyze return values of ISD::RET.
- CCInfo.AnalyzeReturn(Op.getNode(), RetCC_ARM);
+ CCInfo.AnalyzeReturn(Op.getNode(), CCAssignFnForNode(CC, /* Return */ true));
// If this is the first return lowered for this function, add
// the regs to the liveout set for the function.
@@ -1085,7 +1130,8 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(CC, isVarArg, getTargetMachine(), ArgLocs);
- CCInfo.AnalyzeFormalArguments(Op.getNode(), CC_ARM);
+ CCInfo.AnalyzeFormalArguments(Op.getNode(),
+ CCAssignFnForNode(CC, /* Return*/ false));
SmallVector<SDValue, 16> ArgValues;
@@ -1456,7 +1502,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
MVT VT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
- unsigned FrameReg = (Subtarget->isThumb() || Subtarget->useThumbBacktraces())
+ unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
? ARM::R7 : ARM::R11;
SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
while (Depth--)
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 2dab2db..8f53e39 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -151,6 +151,7 @@ namespace llvm {
///
unsigned ARMPCLabelIndex;
+ CCAssignFn *CCAssignFnForNode(unsigned CC, bool Return) const;
SDValue LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
const SDValue &StackPtr, const CCValAssign &VA,
SDValue Chain, SDValue Arg, ISD::ArgFlagsTy Flags);
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 4b0dbb5..d19fb8e 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -697,7 +697,6 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
- MBB.insert(MI, PopMI);
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
if (Reg == ARM::LR) {
@@ -706,10 +705,15 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
continue;
Reg = ARM::PC;
PopMI->setDesc(get(ARM::tPOP_RET));
- MBB.erase(MI);
+ MI = MBB.erase(MI);
}
PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
}
+
+ // It's illegal to emit pop instruction without operands.
+ if (PopMI->getNumOperands() > 0)
+ MBB.insert(MI, PopMI);
+
return true;
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cc9f1a5..4707e3b 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -90,12 +90,12 @@ def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
-def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
-def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
-def IsThumb : Predicate<"Subtarget->isThumb()">;
-def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
-def IsARM : Predicate<"!Subtarget->isThumb()">;
+def HasV5T : Predicate<"Subtarget->hasV5TOps()">;
+def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">;
+def HasV6 : Predicate<"Subtarget->hasV6Ops()">;
+def IsThumb : Predicate<"Subtarget->isThumb()">;
+def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
+def IsARM : Predicate<"!Subtarget->isThumb()">;
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
@@ -539,7 +539,7 @@ let isReturn = 1, isTerminator = 1 in
LdStMulFrm, "ldm${p}${addr:submode} $addr, $dst1",
[]>;
-let isCall = 1,
+let isCall = 1, Itinerary = IIC_Br,
Defs = [R0, R1, R2, R3, R12, LR,
D0, D1, D2, D3, D4, D5, D6, D7, CPSR] in {
def BL : ABXI<0b1011, (outs), (ins i32imm:$func, variable_ops),
@@ -567,7 +567,7 @@ let isCall = 1,
}
}
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, Itinerary = IIC_Br in {
// B is "predicable" since it can be xformed into a Bcc.
let isBarrier = 1 in {
let isPredicable = 1 in
@@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
let mayLoad = 1 in {
// Load doubleword
-def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- "ldr", "d $dst, $addr",
- []>, Requires<[IsARM, HasV5T]>;
+def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
+ "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>;
// Indexed loads
def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb),
@@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm,
// Store doubleword
let mayStore = 1 in
-def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
- "str", "d $src, $addr",
- []>, Requires<[IsARM, HasV5T]>;
+def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm,
+ "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>;
// Indexed stores
def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
@@ -1387,6 +1385,12 @@ def : ARMV5TEPat<(add GPR:$acc,
include "ARMInstrThumb.td"
//===----------------------------------------------------------------------===//
+// Thumb2 Support
+//
+
+include "ARMInstrThumb2.td"
+
+//===----------------------------------------------------------------------===//
// Floating Point Support
//
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 54232f6..9297f08 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -277,6 +277,7 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
//
// Add with carry
+let isCommutable = 1 in
def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"adc $dst, $rhs",
[(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
@@ -311,6 +312,7 @@ def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
"add $dst, $rhs * 4", []>;
+let isCommutable = 1 in
def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"and $dst, $rhs",
[(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
@@ -358,6 +360,7 @@ def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
// TODO: A7-37: CMP(3) - cmp hi regs
+let isCommutable = 1 in
def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"eor $dst, $rhs",
[(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
@@ -399,6 +402,7 @@ def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
"cpy $dst, $src\t@ hir2hir", []>;
} // neverHasSideEffects
+let isCommutable = 1 in
def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"mul $dst, $rhs",
[(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
@@ -411,6 +415,7 @@ def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
"neg $dst, $src",
[(set tGPR:$dst, (ineg tGPR:$src))]>;
+let isCommutable = 1 in
def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
"orr $dst, $rhs",
[(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 168fb45..07c71da 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -10,3 +10,199 @@
// This file describes the Thumb2 instruction set.
//
//===----------------------------------------------------------------------===//
+
+// Shifted operands. No register controlled shifts for Thumb2.
+// Note: We do not support rrx shifted operands yet.
+def t2_so_reg : Operand<i32>, // reg imm
+ ComplexPattern<i32, 2, "SelectShifterOperand",
+ [shl,srl,sra,rotr]> {
+ let PrintMethod = "printSOOperand";
+ let MIOperandInfo = (ops GPR, i32imm);
+}
+
+def LO16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned short)N->getZExtValue());
+}]>;
+
+def HI16 : SDNodeXForm<imm, [{
+ // Transformation function: shift the immediate value down into the low bits.
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+def imm16high : PatLeaf<(i32 imm), [{
+ // Returns true if all bits out of the [31..16] range are 0.
+ return ((N->getZExtValue() & 0xFFFF0000ULL) == N->getZExtValue());
+}], HI16>;
+
+def imm16high0xffff : PatLeaf<(i32 imm), [{
+ // Returns true if lo 16 bits are set and this is a 32-bit value.
+ return ((N->getZExtValue() & 0x0000FFFFULL) == 0xFFFFULL);
+}], HI16>;
+
+def imm0_4095 : PatLeaf<(i32 imm), [{
+ return (uint32_t)N->getZExtValue() < 4096;
+}]>;
+
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)-N->getZExtValue() < 4096;
+}], imm_neg_XFORM>;
+
+def imm0_65535 : PatLeaf<(i32 imm), [{
+ return N->getZExtValue() < 65536;
+}]>;
+
+// A6.3.2 Modified immediate constants in Thumb instructions (#<const>)
+// FIXME: Move it the the addrmode matcher code.
+def t2_so_imm : PatLeaf<(i32 imm), [{
+ uint64_t v = N->getZExtValue();
+ if (v == 0 || v > 0xffffffffUL) return false;
+ // variant1 - 0b0000x - 8-bit which could be zero (not supported for now)
+
+ // variant2 - 0b00nnx - 8-bit repeated inside the 32-bit room
+ unsigned hi16 = (unsigned)(v >> 16);
+ unsigned lo16 = (unsigned)(v & 0xffffUL);
+ bool valid = (hi16 == lo16) && (
+ (v & 0x00ff00ffUL) == 0 || // type 0001x
+ (v & 0xff00ff00UL) == 0 || // type 0010x
+ ((lo16 >> 8) == (lo16 & 0xff))); // type 0011x
+ if (valid) return true;
+
+ // variant3 - 0b01000..0b11111 - 8-bit shifted inside the 32-bit room
+ unsigned shift = CountLeadingZeros_32(v);
+ uint64_t mask = (0xff000000ULL >> shift);
+ // If valid, it is type 01000 + shift
+ return ((shift < 24) && (v & mask) > 0) && ((v & (~mask)) == 0);
+}]>;
+
+
+//===----------------------------------------------------------------------===//
+// Thumb-2 to cover the functionality of the ARM instruction set.
+//
+
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|reg|so_reg}) patterns for a
+// binary operation that produces a value.
+multiclass T2I_bin_irs<string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ !strconcat(opc, " $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ Requires<[HasThumb2]>;
+ // register
+ def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+ !strconcat(opc, " $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ Requires<[HasThumb2]>;
+ // shifted register
+ def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+ !strconcat(opc, " $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ Requires<[HasThumb2]>;
+}
+
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
+/// instruction modifies the CPSR register.
+let Defs = [CPSR] in {
+multiclass T2I_bin_s_irs<string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ !strconcat(opc, "s $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ Requires<[HasThumb2]>;
+
+ // register
+ def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+ !strconcat(opc, "s $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ Requires<[HasThumb2]>;
+
+ // shifted register
+ def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+ !strconcat(opc, "s $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ Requires<[HasThumb2]>;
+}
+}
+
+/// T2I_bin_c_irs - Similar to T2I_bin_irs except it uses the 's' bit. Also the
+/// instruction can optionally set the CPSR register.
+let Uses = [CPSR] in {
+multiclass T2I_bin_c_irs<string opc, PatFrag opnode> {
+ // shifted imm
+ def ri : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs, cc_out:$s),
+ !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ Requires<[HasThumb2]>;
+
+ // register
+ def rr : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs, cc_out:$s),
+ !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ Requires<[HasThumb2]>;
+
+ // shifted register
+ def rs : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
+ !strconcat(opc, "${s} $dst, $lhs, $rhs"),
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ Requires<[HasThumb2]>;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+
+//===----------------------------------------------------------------------===//
+// Move Instructions.
+//
+def tMOVi16 : PseudoInst<(outs GPR:$dst), (ins i32imm:$src),
+ "movw $dst, $src",
+ [(set GPR:$dst, imm0_65535:$src)]>,
+ Requires<[HasThumb2]>;
+
+let isTwoAddress = 1 in
+def tMOVTi16 : PseudoInst<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+ "movt $dst, $imm",
+ [(set GPR:$dst, (or (and GPR:$src, 0xffff),
+ imm16high:$imm))]>,
+ Requires<[HasThumb2]>;
+
+def : Pat<(and (or GPR:$src, imm16high:$imm1), imm16high0xffff:$imm2),
+ (tMOVTi16 GPR:$src, (HI16 imm16high:$imm1))>,
+ Requires<[HasThumb2]>;
+
+def : Pat<(i32 imm:$imm),
+ (tMOVTi16 (tMOVi16 (LO16 imm:$imm)),(HI16 imm:$imm))>,
+ Requires<[HasThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions.
+//
+defm t2ADD : T2I_bin_irs <"add", BinOpFrag<(add node:$LHS, node:$RHS)>>;
+defm t2SUB : T2I_bin_irs <"sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>;
+
+def tADDri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ "add $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm0_4095:$rhs))]>,
+ Requires<[HasThumb2]>;
+def tSUBri12 : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+ "sub $dst, $lhs, $rhs",
+ [(set GPR:$dst, (add GPR:$lhs, imm0_4095_neg:$rhs))]>,
+ Requires<[HasThumb2]>;
+
+defm t2ADDS : T2I_bin_s_irs<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm t2SUBS : T2I_bin_s_irs<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+
+defm t2ADC : T2I_bin_c_irs<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
+defm t2SBC : T2I_bin_c_irs<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+
+
+def tMLS : PseudoInst<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ "mls $dst, $a, $b, $c",
+ [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>,
+ Requires<[HasThumb2]>;
+
+def tORNrs : PseudoInst<(outs GPR:$dst), (ins GPR:$src1, t2_so_reg:$src2),
+ "orn $dst, $src1, $src2",
+ [(set GPR:$dst, (or GPR:$src1, (not t2_so_reg: $src2)))]>,
+ Requires<[HasThumb2]>;
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 684ecb4..59cf125 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -17,19 +17,22 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/Compiler.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -39,6 +42,12 @@ STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
+STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
+STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
+STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
+STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
+STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
/// load / store instructions to form ldm / stm instructions.
@@ -82,6 +91,8 @@ namespace {
SmallVector<MachineBasicBlock::iterator, 4> &Merges);
void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
+ bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI);
bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
};
@@ -586,13 +597,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
static int getMemoryOpOffset(const MachineInstr *MI) {
int Opcode = MI->getOpcode();
bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
unsigned NumOperands = MI->getDesc().getNumOperands();
unsigned OffField = MI->getOperand(NumOperands-3).getImm();
int Offset = isAM2
- ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ ? ARM_AM::getAM2Offset(OffField)
+ : (isAM3 ? ARM_AM::getAM3Offset(OffField)
+ : ARM_AM::getAM5Offset(OffField) * 4);
if (isAM2) {
if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
Offset = -Offset;
+ } else if (isAM3) {
+ if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
} else {
if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
Offset = -Offset;
@@ -600,6 +617,120 @@ static int getMemoryOpOffset(const MachineInstr *MI) {
return Offset;
}
+static void InsertLDR_STR(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ int OffImm, bool isDef,
+ DebugLoc dl, unsigned NewOpc,
+ unsigned Reg, bool RegDeadKill,
+ unsigned BaseReg, bool BaseKill,
+ unsigned OffReg, bool OffKill,
+ ARMCC::CondCodes Pred, unsigned PredReg,
+ const TargetInstrInfo *TII) {
+ unsigned Offset;
+ if (OffImm < 0)
+ Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift);
+ else
+ Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift);
+ if (isDef)
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addReg(OffReg, getKillRegState(OffKill))
+ .addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ else
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(Reg, getKillRegState(RegDeadKill))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addReg(OffReg, getKillRegState(OffKill))
+ .addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+}
+
+bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI) {
+ MachineInstr *MI = &*MBBI;
+ unsigned Opcode = MI->getOpcode();
+ if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
+ unsigned EvenReg = MI->getOperand(0).getReg();
+ unsigned OddReg = MI->getOperand(1).getReg();
+ unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
+ unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
+ if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
+ return false;
+
+ bool isLd = Opcode == ARM::LDRD;
+ bool EvenDeadKill = isLd ?
+ MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
+ bool OddDeadKill = isLd ?
+ MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
+ const MachineOperand &BaseOp = MI->getOperand(2);
+ unsigned BaseReg = BaseOp.getReg();
+ bool BaseKill = BaseOp.isKill();
+ const MachineOperand &OffOp = MI->getOperand(3);
+ unsigned OffReg = OffOp.getReg();
+ bool OffKill = OffOp.isKill();
+ int OffImm = getMemoryOpOffset(MI);
+ unsigned PredReg = 0;
+ ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
+
+ if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) {
+ // Ascending register numbers and no offset. It's safe to change it to a
+ // ldm or stm.
+ unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM;
+ if (isLd) {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
+ .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
+ ++NumLDRD2LDM;
+ } else {
+ BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
+ .addReg(BaseReg, getKillRegState(BaseKill))
+ .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia))
+ .addImm(Pred).addReg(PredReg)
+ .addReg(EvenReg, getKillRegState(EvenDeadKill))
+ .addReg(OddReg, getKillRegState(OddDeadKill));
+ ++NumSTRD2STM;
+ }
+ } else {
+ // Split into two instructions.
+ unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR;
+ DebugLoc dl = MBBI->getDebugLoc();
+ // If this is a load and base register is killed, it may have been
+ // re-defed by the load, make sure the first load does not clobber it.
+ if (isLd &&
+ (BaseKill || OffKill) &&
+ (TRI->regsOverlap(EvenReg, BaseReg) ||
+ (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) {
+ assert(!TRI->regsOverlap(OddReg, BaseReg) &&
+ (!OffReg || !TRI->regsOverlap(OddReg, OffReg)));
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc, OddReg, OddDeadKill,
+ BaseReg, false, OffReg, false, Pred, PredReg, TII);
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill,
+ BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII);
+ } else {
+ InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
+ EvenReg, EvenDeadKill, BaseReg, false, OffReg, false,
+ Pred, PredReg, TII);
+ InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
+ OddReg, OddDeadKill, BaseReg, BaseKill, OffReg, OffKill,
+ Pred, PredReg, TII);
+ }
+ if (isLd)
+ ++NumLDRD2LDR;
+ else
+ ++NumSTRD2STR;
+ }
+
+ MBBI = prior(MBBI);
+ MBB.erase(MI);
+ }
+ return false;
+}
+
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -617,6 +748,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
RS->enterBasicBlock(&MBB);
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
+ if (FixInvalidRegPairOp(MBB, MBBI))
+ continue;
+
bool Advance = false;
bool TryMerge = false;
bool Clobber = false;
@@ -817,8 +951,10 @@ namespace {
static char ID;
ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+ const TargetData *TD;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const ARMSubtarget *STI;
MachineRegisterInfo *MRI;
virtual bool runOnMachineFunction(MachineFunction &Fn);
@@ -828,6 +964,11 @@ namespace {
}
private:
+ bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ unsigned &OffReg, unsigned &Offset,
+ unsigned &PredReg, ARMCC::CondCodes &Pred);
bool RescheduleOps(MachineBasicBlock *MBB,
SmallVector<MachineInstr*, 4> &Ops,
unsigned Base, bool isLd,
@@ -838,8 +979,10 @@ namespace {
}
bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TD = Fn.getTarget().getTargetData();
TII = Fn.getTarget().getInstrInfo();
TRI = Fn.getTarget().getRegisterInfo();
+ STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
MRI = &Fn.getRegInfo();
bool Modified = false;
@@ -850,15 +993,19 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
return Modified;
}
-static bool IsSafeToMove(bool isLd, unsigned Base,
- MachineBasicBlock::iterator I,
- MachineBasicBlock::iterator E,
- SmallPtrSet<MachineInstr*, 4> MoveOps,
- const TargetRegisterInfo *TRI) {
+static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> &MemOps,
+ SmallSet<unsigned, 4> &MemRegs,
+ const TargetRegisterInfo *TRI) {
// Are there stores / loads / calls between them?
// FIXME: This is overly conservative. We should make use of alias information
// some day.
+ SmallSet<unsigned, 4> AddedRegPressure;
while (++I != E) {
+ if (MemOps.count(&*I))
+ continue;
const TargetInstrDesc &TID = I->getDesc();
if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
return false;
@@ -871,15 +1018,76 @@ static bool IsSafeToMove(bool isLd, unsigned Base,
// str r1, [r0]
// strh r5, [r0]
// str r4, [r0, #+4]
- if (TID.mayStore() && !MoveOps.count(&*I))
+ if (TID.mayStore())
return false;
}
for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
MachineOperand &MO = I->getOperand(j);
- if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (MO.isDef() && TRI->regsOverlap(Reg, Base))
return false;
+ if (Reg != Base && !MemRegs.count(Reg))
+ AddedRegPressure.insert(Reg);
}
}
+
+ // Estimate register pressure increase due to the transformation.
+ if (MemRegs.size() <= 4)
+ // Ok if we are moving small number of instructions.
+ return true;
+ return AddedRegPressure.size() <= MemRegs.size() * 2;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
+ DebugLoc &dl,
+ unsigned &NewOpc, unsigned &EvenReg,
+ unsigned &OddReg, unsigned &BaseReg,
+ unsigned &OffReg, unsigned &Offset,
+ unsigned &PredReg,
+ ARMCC::CondCodes &Pred) {
+ // FIXME: FLDS / FSTS -> FLDD / FSTD
+ unsigned Opcode = Op0->getOpcode();
+ if (Opcode == ARM::LDR)
+ NewOpc = ARM::LDRD;
+ else if (Opcode == ARM::STR)
+ NewOpc = ARM::STRD;
+ else
+ return 0;
+
+ // Must sure the base address satisfies i64 ld / st alignment requirement.
+ if (!Op0->hasOneMemOperand() ||
+ !Op0->memoperands_begin()->getValue() ||
+ Op0->memoperands_begin()->isVolatile())
+ return false;
+
+ unsigned Align = Op0->memoperands_begin()->getAlignment();
+ unsigned ReqAlign = STI->hasV6Ops()
+ ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align
+ if (Align < ReqAlign)
+ return false;
+
+ // Then make sure the immediate offset fits.
+ int OffImm = getMemoryOpOffset(Op0);
+ ARM_AM::AddrOpc AddSub = ARM_AM::add;
+ if (OffImm < 0) {
+ AddSub = ARM_AM::sub;
+ OffImm = - OffImm;
+ }
+ if (OffImm >= 256) // 8 bits
+ return false;
+ Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
+
+ EvenReg = Op0->getOperand(0).getReg();
+ OddReg = Op1->getOperand(0).getReg();
+ if (EvenReg == OddReg)
+ return false;
+ BaseReg = Op0->getOperand(1).getReg();
+ OffReg = Op0->getOperand(2).getReg();
+ Pred = getInstrPredicate(Op0, PredReg);
+ dl = Op0->getDebugLoc();
return true;
}
@@ -902,6 +1110,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
MachineInstr *FirstOp = 0;
MachineInstr *LastOp = 0;
int LastOffset = 0;
+ unsigned LastOpcode = 0;
unsigned LastBytes = 0;
unsigned NumMove = 0;
for (int i = Ops.size() - 1; i >= 0; --i) {
@@ -916,6 +1125,10 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
LastOp = Op;
}
+ unsigned Opcode = Op->getOpcode();
+ if (LastOpcode && Opcode != LastOpcode)
+ break;
+
int Offset = getMemoryOpOffset(Op);
unsigned Bytes = getLSMultipleTransferSize(Op);
if (LastBytes) {
@@ -924,34 +1137,80 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
}
LastOffset = Offset;
LastBytes = Bytes;
- if (++NumMove == 4)
+ LastOpcode = Opcode;
+ if (++NumMove == 8) // FIXME: Tune
break;
}
if (NumMove <= 1)
Ops.pop_back();
else {
- SmallPtrSet<MachineInstr*, 4> MoveOps;
- for (int i = NumMove-1; i >= 0; --i)
- MoveOps.insert(Ops[i]);
+ SmallPtrSet<MachineInstr*, 4> MemOps;
+ SmallSet<unsigned, 4> MemRegs;
+ for (int i = NumMove-1; i >= 0; --i) {
+ MemOps.insert(Ops[i]);
+ MemRegs.insert(Ops[i]->getOperand(0).getReg());
+ }
// Be conservative, if the instructions are too far apart, don't
// move them. We want to limit the increase of register pressure.
- bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+ bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
if (DoMove)
- DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+ DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
+ MemOps, MemRegs, TRI);
if (!DoMove) {
for (unsigned i = 0; i != NumMove; ++i)
Ops.pop_back();
} else {
// This is the new location for the loads / stores.
MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
- while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+ while (InsertPos != MBB->end() && MemOps.count(InsertPos))
++InsertPos;
- for (unsigned i = 0; i != NumMove; ++i) {
- MachineInstr *Op = Ops.back();
+
+ // If we are moving a pair of loads / stores, see if it makes sense
+ // to try to allocate a pair of registers that can form register pairs.
+ MachineInstr *Op0 = Ops.back();
+ MachineInstr *Op1 = Ops[Ops.size()-2];
+ unsigned EvenReg = 0, OddReg = 0;
+ unsigned BaseReg = 0, OffReg = 0, PredReg = 0;
+ ARMCC::CondCodes Pred = ARMCC::AL;
+ unsigned NewOpc = 0;
+ unsigned Offset = 0;
+ DebugLoc dl;
+ if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
+ EvenReg, OddReg, BaseReg, OffReg,
+ Offset, PredReg, Pred)) {
+ Ops.pop_back();
Ops.pop_back();
- MBB->splice(InsertPos, MBB, Op);
+
+ // Form the pair instruction.
+ if (isLd) {
+ BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+ .addReg(EvenReg, RegState::Define)
+ .addReg(OddReg, RegState::Define)
+ .addReg(BaseReg).addReg(0).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ ++NumLDRDFormed;
+ } else {
+ BuildMI(*MBB, InsertPos, dl, TII->get(NewOpc))
+ .addReg(EvenReg)
+ .addReg(OddReg)
+ .addReg(BaseReg).addReg(0).addImm(Offset)
+ .addImm(Pred).addReg(PredReg);
+ ++NumSTRDFormed;
+ }
+ MBB->erase(Op0);
+ MBB->erase(Op1);
+
+ // Add register allocation hints to form register pairs.
+ MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
+ MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
+ } else {
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
}
NumLdStMoved += NumMove;
@@ -1039,7 +1298,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
}
if (StopHere) {
- // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+ // Found a duplicate (a base+offset combination that's seen earlier).
+ // Backtrack.
--Loc;
break;
}
diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp
index 199858f..bbc1300 100644
--- a/lib/Target/ARM/ARMRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMRegisterInfo.cpp
@@ -159,7 +159,7 @@ ARMRegisterInfo::ARMRegisterInfo(const TargetInstrInfo &tii,
const ARMSubtarget &sti)
: ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
TII(tii), STI(sti),
- FramePtr((STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
+ FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11) {
}
static inline
@@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB,
.addReg(0).addImm(0).addImm(Pred).addReg(PredReg);
}
-const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
- return &ARM::GPRRegClass;
-}
-
/// isLowRegister - Returns true if the register is low register r0-r7.
///
bool ARMRegisterInfo::isLowRegister(unsigned Reg) const {
@@ -304,6 +300,191 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const {
return false;
}
+const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const {
+ return &ARM::GPRRegClass;
+}
+
+/// getAllocationOrder - Returns the register allocation order for a specified
+/// register class in the form of a pair of TargetRegisterClass iterators.
+std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const {
+ // Alternative register allocation orders when favoring even / odd registers
+ // of register pairs.
+
+ // No FP, R9 is available.
+ static const unsigned GPREven1[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd1[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R7, R9 is available.
+ static const unsigned GPREven2[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6,
+ ARM::R9, ARM::R11
+ };
+ static const unsigned GPROdd2[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6,
+ ARM::R8, ARM::R10
+ };
+
+ // FP is R11, R9 is available.
+ static const unsigned GPREven3[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7,
+ ARM::R9
+ };
+ static const unsigned GPROdd3[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7,
+ ARM::R8
+ };
+
+ // No FP, R9 is not available.
+ static const unsigned GPREven4[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd4[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R7, R9 is not available.
+ static const unsigned GPREven5[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R10,
+ ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8,
+ ARM::R11
+ };
+ static const unsigned GPROdd5[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R11,
+ ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8,
+ ARM::R10
+ };
+
+ // FP is R11, R9 is not available.
+ static const unsigned GPREven6[] = {
+ ARM::R0, ARM::R2, ARM::R4, ARM::R6,
+ ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8
+ };
+ static const unsigned GPROdd6[] = {
+ ARM::R1, ARM::R3, ARM::R5, ARM::R7,
+ ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8
+ };
+
+
+ if (HintType == ARMRI::RegPairEven) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
+
+ if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPREven1,
+ GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPREven4,
+ GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPREven2,
+ GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPREven5,
+ GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPREven3,
+ GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPREven6,
+ GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+ }
+ } else if (HintType == ARMRI::RegPairOdd) {
+ if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
+ // It's no longer possible to fulfill this hint. Return the default
+ // allocation order.
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
+
+ if (!STI.isTargetDarwin() && !hasFP(MF)) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPROdd1,
+ GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPROdd4,
+ GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+ } else if (FramePtr == ARM::R7) {
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPROdd2,
+ GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPROdd5,
+ GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+ } else { // FramePtr == ARM::R11
+ if (!STI.isR9Reserved())
+ return std::make_pair(GPROdd3,
+ GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+ else
+ return std::make_pair(GPROdd6,
+ GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+ }
+ }
+ return std::make_pair(RC->allocation_order_begin(MF),
+ RC->allocation_order_end(MF));
+}
+
+/// ResolveRegAllocHint - Resolves the specified register allocation hint
+/// to a physical register. Returns the physical register if it is successful.
+unsigned
+ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const {
+ if (Reg == 0 || !isPhysicalRegister(Reg))
+ return 0;
+ if (Type == 0)
+ return Reg;
+ else if (Type == (unsigned)ARMRI::RegPairOdd)
+ // Odd register.
+ return getRegisterPairOdd(Reg, MF);
+ else if (Type == (unsigned)ARMRI::RegPairEven)
+ // Even register.
+ return getRegisterPairEven(Reg, MF);
+ return 0;
+}
+
+void
+ARMRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const {
+ MachineRegisterInfo *MRI = &MF.getRegInfo();
+ std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(Reg);
+ if ((Hint.first == (unsigned)ARMRI::RegPairOdd ||
+ Hint.first == (unsigned)ARMRI::RegPairEven) &&
+ Hint.second && TargetRegisterInfo::isVirtualRegister(Hint.second)) {
+ // If 'Reg' is one of the even / odd register pair and it's now changed
+ // (e.g. coalesced) into a different register. The other register of the
+ // pair allocation hint must be updated to reflect the relationship
+ // change.
+ unsigned OtherReg = Hint.second;
+ Hint = MRI->getRegAllocationHint(OtherReg);
+ if (Hint.second == Reg)
+ // Make sure the pair has not already divorced.
+ MRI->setRegAllocationHint(OtherReg, Hint.first, NewReg);
+ }
+}
+
bool
ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
@@ -1506,9 +1687,8 @@ unsigned ARMRegisterInfo::getRARegister() const {
unsigned ARMRegisterInfo::getFrameRegister(MachineFunction &MF) const {
if (STI.isTargetDarwin() || hasFP(MF))
- return (STI.useThumbBacktraces() || STI.isThumb()) ? ARM::R7 : ARM::R11;
- else
- return ARM::SP;
+ return FramePtr;
+ return ARM::SP;
}
unsigned ARMRegisterInfo::getEHExceptionRegister() const {
@@ -1525,4 +1705,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
+unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R1:
+ return ARM::R0;
+ case ARM::R3:
+ // FIXME!
+ return STI.isThumb() ? 0 : ARM::R2;
+ case ARM::R5:
+ return ARM::R4;
+ case ARM::R7:
+ return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6;
+ case ARM::R9:
+ return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8;
+ case ARM::R11:
+ return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10;
+
+ case ARM::S1:
+ return ARM::S0;
+ case ARM::S3:
+ return ARM::S2;
+ case ARM::S5:
+ return ARM::S4;
+ case ARM::S7:
+ return ARM::S6;
+ case ARM::S9:
+ return ARM::S8;
+ case ARM::S11:
+ return ARM::S10;
+ case ARM::S13:
+ return ARM::S12;
+ case ARM::S15:
+ return ARM::S14;
+ case ARM::S17:
+ return ARM::S16;
+ case ARM::S19:
+ return ARM::S18;
+ case ARM::S21:
+ return ARM::S20;
+ case ARM::S23:
+ return ARM::S22;
+ case ARM::S25:
+ return ARM::S24;
+ case ARM::S27:
+ return ARM::S26;
+ case ARM::S29:
+ return ARM::S28;
+ case ARM::S31:
+ return ARM::S30;
+
+ case ARM::D1:
+ return ARM::D0;
+ case ARM::D3:
+ return ARM::D2;
+ case ARM::D5:
+ return ARM::D4;
+ case ARM::D7:
+ return ARM::D6;
+ case ARM::D9:
+ return ARM::D8;
+ case ARM::D11:
+ return ARM::D10;
+ case ARM::D13:
+ return ARM::D12;
+ case ARM::D15:
+ return ARM::D14;
+ }
+
+ return 0;
+}
+
+unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg,
+ const MachineFunction &MF) const {
+ switch (Reg) {
+ default: break;
+ // Return 0 if either register of the pair is a special register.
+ // So no R12, etc.
+ case ARM::R0:
+ return ARM::R1;
+ case ARM::R2:
+ // FIXME!
+ return STI.isThumb() ? 0 : ARM::R3;
+ case ARM::R4:
+ return ARM::R5;
+ case ARM::R6:
+ return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7;
+ case ARM::R8:
+ return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9;
+ case ARM::R10:
+ return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11;
+
+ case ARM::S0:
+ return ARM::S1;
+ case ARM::S2:
+ return ARM::S3;
+ case ARM::S4:
+ return ARM::S5;
+ case ARM::S6:
+ return ARM::S7;
+ case ARM::S8:
+ return ARM::S9;
+ case ARM::S10:
+ return ARM::S11;
+ case ARM::S12:
+ return ARM::S13;
+ case ARM::S14:
+ return ARM::S15;
+ case ARM::S16:
+ return ARM::S17;
+ case ARM::S18:
+ return ARM::S19;
+ case ARM::S20:
+ return ARM::S21;
+ case ARM::S22:
+ return ARM::S23;
+ case ARM::S24:
+ return ARM::S25;
+ case ARM::S26:
+ return ARM::S27;
+ case ARM::S28:
+ return ARM::S29;
+ case ARM::S30:
+ return ARM::S31;
+
+ case ARM::D0:
+ return ARM::D1;
+ case ARM::D2:
+ return ARM::D3;
+ case ARM::D4:
+ return ARM::D5;
+ case ARM::D6:
+ return ARM::D7;
+ case ARM::D8:
+ return ARM::D9;
+ case ARM::D10:
+ return ARM::D11;
+ case ARM::D12:
+ return ARM::D13;
+ case ARM::D14:
+ return ARM::D15;
+ }
+
+ return 0;
+}
+
#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h
index e1d9efb..e8f4fd8 100644
--- a/lib/Target/ARM/ARMRegisterInfo.h
+++ b/lib/Target/ARM/ARMRegisterInfo.h
@@ -22,12 +22,17 @@ namespace llvm {
class TargetInstrInfo;
class Type;
+/// Register allocation hints.
+namespace ARMRI {
+ enum {
+ RegPairOdd = 1,
+ RegPairEven = 2
+ };
+}
+
struct ARMRegisterInfo : public ARMGenRegisterInfo {
const TargetInstrInfo &TII;
const ARMSubtarget &STI;
-private:
- /// FramePtr - ARM physical register used as frame ptr.
- unsigned FramePtr;
public:
ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI);
@@ -49,10 +54,6 @@ public:
/// if the register is a single precision VFP register.
static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP);
- /// getPointerRegClass - Return the register class to use to hold pointers.
- /// This is used for addressing modes.
- const TargetRegisterClass *getPointerRegClass() const;
-
/// Code Generation virtual methods...
const TargetRegisterClass *
getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const;
@@ -65,6 +66,19 @@ public:
bool isReservedReg(const MachineFunction &MF, unsigned Reg) const;
+ const TargetRegisterClass *getPointerRegClass() const;
+
+ std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
+ getAllocationOrder(const TargetRegisterClass *RC,
+ unsigned HintType, unsigned HintReg,
+ const MachineFunction &MF) const;
+
+ unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
+ const MachineFunction &MF) const;
+
+ void UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
+ MachineFunction &MF) const;
+
bool requiresRegisterScavenging(const MachineFunction &MF) const;
bool hasFP(const MachineFunction &MF) const;
@@ -95,6 +109,15 @@ public:
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
bool isLowRegister(unsigned Reg) const;
+
+private:
+ /// FramePtr - ARM physical register used as frame ptr.
+ unsigned FramePtr;
+
+ unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const;
+
+ unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const;
+
};
} // end namespace llvm
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index ebe7d58..d864079 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -134,7 +134,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
GPRClass::allocation_order_begin(const MachineFunction &MF) const {
const TargetMachine &TM = MF.getTarget();
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
- if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isTargetDarwin()) {
if (Subtarget.isR9Reserved())
return ARM_GPR_AO_4;
else
@@ -154,7 +154,7 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
GPRClass::iterator I;
- if (Subtarget.useThumbBacktraces()) {
+ if (Subtarget.isTargetDarwin()) {
if (Subtarget.isR9Reserved()) {
I = ARM_GPR_AO_4 + (sizeof(ARM_GPR_AO_4)/sizeof(unsigned));
} else {
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
new file mode 100644
index 0000000..75fa707
--- /dev/null
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -0,0 +1,35 @@
+//===- ARMSchedule.td - ARM Scheduling Definitions ---------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across ARM processors
+//
+def FU_iALU : FuncUnit; // Integer alu unit
+def FU_iLdSt : FuncUnit; // Integer load / store unit
+def FU_FpALU : FuncUnit; // FP alu unit
+def FU_FpLdSt : FuncUnit; // FP load / store unit
+def FU_Br : FuncUnit; // Branch unit
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for ARM
+//
+def IIC_iALU : InstrItinClass;
+def IIC_iLoad : InstrItinClass;
+def IIC_iStore : InstrItinClass;
+def IIC_fpALU : InstrItinClass;
+def IIC_fpLoad : InstrItinClass;
+def IIC_fpStore : InstrItinClass;
+def IIC_Br : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// Processor instruction itineraries.
+
+def GenericItineraries : ProcessorItineraries<[]>;
+
+include "ARMScheduleV6.td"
diff --git a/lib/Target/ARM/ARMScheduleV6.td b/lib/Target/ARM/ARMScheduleV6.td
new file mode 100644
index 0000000..596a57f
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleV6.td
@@ -0,0 +1,22 @@
+//===- ARMSchedule.td - ARM v6 Scheduling Definitions ------*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the ARM v6 processors.
+//
+//===----------------------------------------------------------------------===//
+
+def V6Itineraries : ProcessorItineraries<[
+ InstrItinData<IIC_iALU , [InstrStage<1, [FU_iALU]>]>,
+ InstrItinData<IIC_iLoad , [InstrStage<2, [FU_iLdSt]>]>,
+ InstrItinData<IIC_iStore , [InstrStage<1, [FU_iLdSt]>]>,
+ InstrItinData<IIC_fpALU , [InstrStage<6, [FU_FpALU]>]>,
+ InstrItinData<IIC_fpLoad , [InstrStage<2, [FU_FpLdSt]>]>,
+ InstrItinData<IIC_fpStore , [InstrStage<1, [FU_FpLdSt]>]>,
+ InstrItinData<IIC_Br , [InstrStage<3, [FU_Br]>]>
+]>;
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index a978380..7ac7b49 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -24,7 +24,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
, ARMFPUType(None)
, IsThumb(isThumb)
, ThumbMode(Thumb1)
- , UseThumbBacktraces(false)
, IsR9Reserved(false)
, stackAlignment(4)
, CPUString("generic")
@@ -83,8 +82,6 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS,
if (isAAPCS_ABI())
stackAlignment = 8;
- if (isTargetDarwin()) {
- UseThumbBacktraces = true;
+ if (isTargetDarwin())
IsR9Reserved = true;
- }
}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 0704055..c3cc7ff 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -14,6 +14,7 @@
#ifndef ARMSUBTARGET_H
#define ARMSUBTARGET_H
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetSubtarget.h"
#include <string>
@@ -48,9 +49,6 @@ protected:
/// ThumbMode - Indicates supported Thumb version.
ThumbTypeEnum ThumbMode;
- /// UseThumbBacktraces - True if we use thumb style backtraces.
- bool UseThumbBacktraces;
-
/// IsR9Reserved - True if R9 is a not available as general purpose register.
bool IsR9Reserved;
@@ -61,6 +59,9 @@ protected:
/// CPUString - String name of used CPU.
std::string CPUString;
+ /// Selected instruction itineraries (one entry per itinerary class.)
+ InstrItineraryData InstrItins;
+
public:
enum {
isELF, isDarwin
@@ -106,14 +107,17 @@ protected:
bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
bool isThumb() const { return IsThumb; }
- bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); }
- bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
+ bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
+ bool hasThumb2() const { return IsThumb && (ThumbMode >= Thumb2); }
- bool useThumbBacktraces() const { return UseThumbBacktraces; }
bool isR9Reserved() const { return IsR9Reserved; }
const std::string & getCPUString() const { return CPUString; }
+ /// getInstrItins - Return the instruction itineraies based on subtarget
+ /// selection.
+ const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
+
/// getStackAlignment - Returns the minimum alignment known to hold of the
/// stack frame on entry to the function and which must be maintained by every
/// function for this subtarget.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index 4107dcc..42b8eae 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -17,80 +17,42 @@
#include <cctype>
using namespace llvm;
-
const char *const llvm::arm_asm_table[] = {
- "{r0}", "r0",
- "{r1}", "r1",
- "{r2}", "r2",
- "{r3}", "r3",
- "{r4}", "r4",
- "{r5}", "r5",
- "{r6}", "r6",
- "{r7}", "r7",
- "{r8}", "r8",
- "{r9}", "r9",
- "{r10}", "r10",
- "{r11}", "r11",
- "{r12}", "r12",
- "{r13}", "r13",
- "{r14}", "r14",
- "{lr}", "lr",
- "{sp}", "sp",
- "{ip}", "ip",
- "{fp}", "fp",
- "{sl}", "sl",
- "{memory}", "memory",
- "{cc}", "cc",
- 0,0};
+ "{r0}", "r0",
+ "{r1}", "r1",
+ "{r2}", "r2",
+ "{r3}", "r3",
+ "{r4}", "r4",
+ "{r5}", "r5",
+ "{r6}", "r6",
+ "{r7}", "r7",
+ "{r8}", "r8",
+ "{r9}", "r9",
+ "{r10}", "r10",
+ "{r11}", "r11",
+ "{r12}", "r12",
+ "{r13}", "r13",
+ "{r14}", "r14",
+ "{lr}", "lr",
+ "{sp}", "sp",
+ "{ip}", "ip",
+ "{fp}", "fp",
+ "{sl}", "sl",
+ "{memory}", "memory",
+ "{cc}", "cc",
+ 0,0
+};
ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
- GlobalPrefix = "_";
- PrivateGlobalPrefix = "L";
- LessPrivateGlobalPrefix = "l";
- StringConstantPrefix = "\1LC";
- BSSSection = 0; // no BSS section
ZeroDirective = "\t.space\t";
ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
SetDirective = "\t.set\t";
- WeakRefDirective = "\t.weak_reference\t";
- WeakDefDirective = "\t.weak_definition ";
- HiddenDirective = "\t.private_extern\t";
ProtectedDirective = NULL;
- JumpTableDataSection = ".const";
- CStringSection = "\t.cstring";
HasDotTypeDotSizeDirective = false;
- HasSingleParameterDotFile = false;
- NeedsIndirectEncoding = true;
- if (TM.getRelocationModel() == Reloc::Static) {
- StaticCtorsSection = ".constructor";
- StaticDtorsSection = ".destructor";
- } else {
- StaticCtorsSection = ".mod_init_func";
- StaticDtorsSection = ".mod_term_func";
- }
-
- // In non-PIC modes, emit a special label before jump tables so that the
- // linker can perform more accurate dead code stripping.
- if (TM.getRelocationModel() != Reloc::PIC_) {
- // Emit a local label that is preserved until the linker runs.
- JumpTableSpecialLabelPrefix = "l";
- }
-
- NeedsSet = true;
- DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
- DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
- DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
- DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
- DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
- DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
- DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
- DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
- DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
- DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
- DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
+ SupportsDebugInformation = true;
}
ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
@@ -115,7 +77,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
DwarfLocSection = "\t.section\t.debug_loc,\"\",%progbits";
DwarfARangesSection = "\t.section\t.debug_aranges,\"\",%progbits";
DwarfRangesSection = "\t.section\t.debug_ranges,\"\",%progbits";
- DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
+ DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",%progbits";
if (Subtarget->isAAPCS_ABI()) {
StaticCtorsSection = "\t.section .init_array,\"aw\",%init_array";
@@ -124,6 +86,7 @@ ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
StaticCtorsSection = "\t.section .ctors,\"aw\",%progbits";
StaticDtorsSection = "\t.section .dtors,\"aw\",%progbits";
}
+ SupportsDebugInformation = true;
}
/// Count the number of comma-separated arguments.
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
index 9e6f856..683692f 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.h
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -26,8 +26,7 @@ namespace llvm {
template <class BaseTAI>
struct ARMTargetAsmInfo : public BaseTAI {
- explicit ARMTargetAsmInfo(const ARMTargetMachine &TM):
- BaseTAI(TM) {
+ explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) {
BaseTAI::AsmTransCBE = arm_asm_table;
BaseTAI::AlignmentIsInBytes = false;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 7033907..8006b9b 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -23,9 +23,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static cl::opt<bool>
-EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
- cl::desc("Enable pre-regalloc load store optimization pass"));
static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
cl::desc("Disable load store optimization pass"));
static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -42,6 +39,11 @@ int ARMTargetMachineModule = 0;
static RegisterTarget<ARMTargetMachine> X("arm", "ARM");
static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeARMTarget() { }
+}
+
// No assembler printer by default
ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
@@ -97,7 +99,8 @@ ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
InstrInfo(Subtarget),
FrameInfo(Subtarget),
JITInfo(),
- TLInfo(*this) {
+ TLInfo(*this),
+ InstrItins(Subtarget.getInstrItineraryData()) {
DefRelocModel = getRelocationModel();
}
@@ -149,8 +152,6 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
- if (!EnablePreLdStOpti)
- return false;
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
PM.add(createARMLoadStoreOptimizationPass(true));
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index 7192c1b..c4c8e6c 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -28,13 +28,14 @@ namespace llvm {
class Module;
class ARMTargetMachine : public LLVMTargetMachine {
- ARMSubtarget Subtarget;
- const TargetData DataLayout; // Calculates type size & alignment
- ARMInstrInfo InstrInfo;
- ARMFrameInfo FrameInfo;
- ARMJITInfo JITInfo;
- ARMTargetLowering TLInfo;
- Reloc::Model DefRelocModel; // Reloc model before it's overridden.
+ ARMSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ ARMInstrInfo InstrInfo;
+ ARMFrameInfo FrameInfo;
+ ARMJITInfo JITInfo;
+ ARMTargetLowering TLInfo;
+ InstrItineraryData InstrItins;
+ Reloc::Model DefRelocModel; // Reloc model before it's overridden.
protected:
// To avoid having target depend on the asmprinter stuff libraries, asmprinter
@@ -59,6 +60,9 @@ public:
virtual ARMTargetLowering *getTargetLowering() const {
return const_cast<ARMTargetLowering*>(&TLInfo);
}
+ virtual const InstrItineraryData getInstrItineraryData() const {
+ return InstrItins;
+ }
static void registerAsmPrinter(AsmPrinterCtorFn F) {
AsmPrinterCtor = F;
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index d908cf4..948a100 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -45,7 +45,6 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
namespace {
class VISIBILITY_HIDDEN ARMAsmPrinter : public AsmPrinter {
DwarfWriter *DW;
- MachineModuleInfo *MMI;
/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
/// make the right decision when printing asm code for different targets.
@@ -84,7 +83,7 @@ namespace {
explicit ARMAsmPrinter(raw_ostream &O, TargetMachine &TM,
const TargetAsmInfo *T, CodeGenOpt::Level OL,
bool V)
- : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(NULL), AFI(NULL), MCP(NULL),
+ : AsmPrinter(O, TM, T, OL, V), DW(0), AFI(NULL), MCP(NULL),
InCPMode(false) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
}
@@ -97,6 +96,7 @@ namespace {
const char *Modifier = 0);
void printSOImmOperand(const MachineInstr *MI, int opNum);
void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
+ void printSOOperand(const MachineInstr *MI, int OpNum);
void printSORegOperand(const MachineInstr *MI, int opNum);
void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
@@ -396,6 +396,28 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
}
+// Constant shifts so_reg is a 3-operand unit corresponding to register forms of
+// the A5.1 "Addressing Mode 1 - Data-processing operands" forms. This
+// includes:
+// REG 0 - e.g. R5
+// REG IMM, SH_OPC - e.g. R5, LSL #3
+void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) {
+ const MachineOperand &MO1 = MI->getOperand(OpNum);
+ const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+ unsigned Reg = MO1.getReg();
+ assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ O << TM.getRegisterInfo()->getAsmName(Reg);
+
+ // Print the shift opc.
+ O << ", "
+ << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+ << " ";
+
+ assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+ O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
// so_reg is a 4-operand unit corresponding to register forms of the A5.1
// "Addressing Mode 1 - Data-processing operands" forms. This includes:
// REG 0 0 - e.g. R5
@@ -805,17 +827,11 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
bool ARMAsmPrinter::doInitialization(Module &M) {
bool Result = AsmPrinter::doInitialization(M);
-
- // Emit initial debug information.
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
- assert(MMI);
DW = getAnalysisIfAvailable<DwarfWriter>();
- assert(DW && "Dwarf Writer is not available");
- DW->BeginModule(&M, MMI, O, this, TAI);
- // Darwin wants symbols to be quoted if they have complex names.
- if (Subtarget->isTargetDarwin())
- Mang->setUseQuotes(true);
+ // Thumb-2 instructions are supported only in unified assembler syntax mode.
+ if (Subtarget->hasThumb2())
+ O << "\t.syntax unified\n";
// Emit ARM Build Attributes
if (Subtarget->isTargetELF()) {
@@ -1115,3 +1131,9 @@ namespace {
}
} Registrator;
}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeARMAsmPrinter() { }
+}
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 7ed8ef6..1be1713 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -39,7 +39,7 @@ static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg,
AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) : TargetLowering(TM) {
// Set up the TargetLowering object.
- //I am having problems with shr n ubyte 1
+ //I am having problems with shr n i8 1
setShiftAmountType(MVT::i64);
setBooleanContents(ZeroOrOneBooleanContent);
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index 4c83054..cdd4fa4 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -21,16 +21,17 @@
using namespace llvm;
-/// AlphaTargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library. In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this. Though it is unused, do not remove it.
-extern "C" int AlphaTargetMachineModule;
-int AlphaTargetMachineModule = 0;
-
// Register the targets
static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
+// No assembler printer by default
+AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeAlphaTarget() { }
+}
+
const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
return new AlphaTargetAsmInfo(*this);
}
@@ -92,23 +93,32 @@ bool AlphaTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
bool Verbose,
raw_ostream &Out) {
PM.add(createAlphaLLRPPass(*this));
- PM.add(createAlphaCodePrinterPass(Out, *this, OptLevel, Verbose));
+ // Output assembly language.
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
return false;
}
bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool DumpAsm, MachineCodeEmitter &MCE) {
PM.add(createAlphaCodeEmitterPass(*this, MCE));
- if (DumpAsm)
- PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
return false;
}
bool AlphaTargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool DumpAsm, JITCodeEmitter &JCE) {
PM.add(createAlphaJITCodeEmitterPass(*this, JCE));
- if (DumpAsm)
- PM.add(createAlphaCodePrinterPass(errs(), *this, OptLevel, true));
+ if (DumpAsm) {
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ }
return false;
}
bool AlphaTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 51224e8..946ca55 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -33,10 +33,18 @@ class AlphaTargetMachine : public LLVMTargetMachine {
AlphaJITInfo JITInfo;
AlphaSubtarget Subtarget;
AlphaTargetLowering TLInfo;
-
+
protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
+
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
public:
AlphaTargetMachine(const Module &M, const std::string &FS);
@@ -46,7 +54,7 @@ public:
virtual const AlphaRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
- virtual AlphaTargetLowering* getTargetLowering() const {
+ virtual AlphaTargetLowering* getTargetLowering() const {
return const_cast<AlphaTargetLowering*>(&TLInfo);
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
@@ -56,7 +64,7 @@ public:
static unsigned getJITMatchQuality();
static unsigned getModuleMatchQuality(const Module &M);
-
+
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
@@ -75,6 +83,10 @@ public:
CodeGenOpt::Level OptLevel,
bool DumpAsm,
JITCodeEmitter &JCE);
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
};
} // end namespace llvm
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 74b48ee6..7b73bb3 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -303,3 +303,17 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
O << ")";
return false;
}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeAlphaAsmPrinter() { }
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ AlphaTargetMachine::registerAsmPrinter(createAlphaCodePrinterPass);
+ }
+ } Registrator;
+}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 5814d27..c3554f6 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -59,6 +59,11 @@ int CBackendTargetMachineModule = 0;
// Register the target.
static RegisterTarget<CTargetMachine> X("c", "C backend");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeCBackendTarget() { }
+}
+
namespace {
/// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
/// any unnamed structure types that are used by the program, and merges
@@ -1449,6 +1454,17 @@ std::string CWriter::GetValueName(const Value *Operand) {
/// writeInstComputationInline - Emit the computation for the specified
/// instruction inline, with no destination provided.
void CWriter::writeInstComputationInline(Instruction &I) {
+ // We can't currently support integer types other than 1, 8, 16, 32, 64.
+ // Validate this.
+ const Type *Ty = I.getType();
+ if (Ty->isInteger() && (Ty!=Type::Int1Ty && Ty!=Type::Int8Ty &&
+ Ty!=Type::Int16Ty && Ty!=Type::Int32Ty && Ty!=Type::Int64Ty)) {
+ cerr << "The C backend does not currently support integer "
+ << "types of widths other than 1, 8, 16, 32, 64.\n";
+ cerr << "This is being tracked as PR 4158.\n";
+ abort();
+ }
+
// If this is a non-trivial bool computation, make sure to truncate down to
// a 1 bit value. This is important because we want "add i1 x, y" to return
// "0" when x and y are true, not "2" for example.
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index da1bf07..26a8ece 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -287,12 +287,11 @@ namespace {
/// LinuxAsmPrinter - SPU assembly printer, customized for Linux
class VISIBILITY_HIDDEN LinuxAsmPrinter : public SPUAsmPrinter {
DwarfWriter *DW;
- MachineModuleInfo *MMI;
public:
explicit LinuxAsmPrinter(raw_ostream &O, SPUTargetMachine &TM,
const TargetAsmInfo *T, CodeGenOpt::Level F,
bool V)
- : SPUAsmPrinter(O, TM, T, F, V), DW(0), MMI(0) {}
+ : SPUAsmPrinter(O, TM, T, F, V), DW(0) {}
virtual const char *getPassName() const {
return "STI CBEA SPU Assembly Printer";
@@ -490,12 +489,8 @@ LinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF)
bool LinuxAsmPrinter::doInitialization(Module &M) {
bool Result = AsmPrinter::doInitialization(M);
- SwitchToTextSection("\t.text");
- // Emit initial debug information.
DW = getAnalysisIfAvailable<DwarfWriter>();
- assert(DW && "Dwarf Writer is not available");
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
- DW->BeginModule(&M, MMI, O, this, TAI);
+ SwitchToTextSection("\t.text");
return Result;
}
@@ -621,3 +616,17 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
bool verbose) {
return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeCellSPUAsmPrinter() { }
+}
+
+namespace {
+ static struct Register {
+ Register() {
+ SPUTargetMachine::registerAsmPrinter(createSPUAsmPrinterPass);
+ }
+ } Registrator;
+}
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index 864a914..b286443 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -249,6 +249,25 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
setOperationAction(ISD::MUL, MVT::i32, Legal);
setOperationAction(ISD::MUL, MVT::i64, Legal);
+ // Expand double-width multiplication
+ // FIXME: It would probably be reasonable to support some of these operations
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::MULHU, MVT::i16, Expand);
+ setOperationAction(ISD::MULHS, MVT::i16, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i8, Custom);
setOperationAction(ISD::ADD, MVT::i64, Legal);
diff --git a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
index ff88ed8..2868ff7 100644
--- a/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUTargetAsmInfo.cpp
@@ -41,7 +41,6 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
SupportsDebugInformation = true;
NeedsSet = true;
- SupportsMacInfoSection = false;
DwarfAbbrevSection = "\t.section .debug_abbrev,\"\",@progbits";
DwarfInfoSection = "\t.section .debug_info,\"\",@progbits";
DwarfLineSection = "\t.section .debug_line,\"\",@progbits";
@@ -52,7 +51,7 @@ SPULinuxTargetAsmInfo::SPULinuxTargetAsmInfo(const SPUTargetMachine &TM) :
DwarfLocSection = "\t.section .debug_loc,\"\",@progbits";
DwarfARangesSection = "\t.section .debug_aranges,\"\",@progbits";
DwarfRangesSection = "\t.section .debug_ranges,\"\",@progbits";
- DwarfMacInfoSection = "\t.section .debug_macinfo,\"\",progbits";
+ DwarfMacroInfoSection = 0; // macro info not supported.
// Exception handling is not supported on CellSPU (think about it: you only
// have 256K for code+data. Would you support exception handling?)
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 7fa9022..c675ebb 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -23,20 +23,20 @@
using namespace llvm;
-/// CellSPUTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library. In particular, it seems that it is not possible to get
-/// things to work on Win32 without this. Though it is unused, do not
-/// remove it.
-extern "C" int CellSPUTargetMachineModule;
-int CellSPUTargetMachineModule = 0;
-
namespace {
// Register the targets
RegisterTarget<SPUTargetMachine>
CELLSPU("cellspu", "STI CBEA Cell SPU [experimental]");
}
+// No assembler printer by default
+SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeCellSPUTarget() { }
+}
+
const std::pair<unsigned, int> *
SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
NumEntries = 1;
@@ -93,6 +93,9 @@ bool SPUTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool Verbose,
raw_ostream &Out) {
- PM.add(createSPUAsmPrinterPass(Out, *this, OptLevel, Verbose));
+ // Output assembly language.
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
return false;
}
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index cd39203..d8fe300 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -35,10 +35,18 @@ class SPUTargetMachine : public LLVMTargetMachine {
SPUFrameInfo FrameInfo;
SPUTargetLowering TLInfo;
InstrItineraryData InstrItins;
-
+
protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
+
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ SPUTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
public:
SPUTargetMachine(const Module &M, const std::string &FS);
@@ -78,7 +86,7 @@ public:
return &DataLayout;
}
- virtual const InstrItineraryData getInstrItineraryData() const {
+ virtual const InstrItineraryData getInstrItineraryData() const {
return InstrItins;
}
@@ -88,6 +96,10 @@ public:
virtual bool addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool Verbose, raw_ostream &Out);
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
};
} // end namespace llvm
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 04a6829..1feea96 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -82,6 +82,11 @@ int CppBackendTargetMachineModule = 0;
// Register the target.
static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeCppBackendTarget() { }
+}
+
namespace {
typedef std::vector<const Type*> TypeList;
typedef std::map<const Type*,std::string> TypeMap;
diff --git a/lib/Target/DarwinTargetAsmInfo.cpp b/lib/Target/DarwinTargetAsmInfo.cpp
index 05d2351..d7d675a 100644
--- a/lib/Target/DarwinTargetAsmInfo.cpp
+++ b/lib/Target/DarwinTargetAsmInfo.cpp
@@ -50,6 +50,53 @@ DarwinTargetAsmInfo::DarwinTargetAsmInfo(const TargetMachine &TM)
ConstDataSection = getUnnamedSection(".const_data", SectionFlags::None);
DataCoalSection = getNamedSection("\t__DATA,__datacoal_nt,coalesced",
SectionFlags::Writeable);
+
+
+ // Common settings for all Darwin targets.
+ // Syntax:
+ GlobalPrefix = "_";
+ PrivateGlobalPrefix = "L";
+ LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata
+ StringConstantPrefix = "\1LC";
+ NeedsSet = true;
+ NeedsIndirectEncoding = true;
+ AllowQuotesInName = true;
+ HasSingleParameterDotFile = false;
+
+ // In non-PIC modes, emit a special label before jump tables so that the
+ // linker can perform more accurate dead code stripping. We do not check the
+ // relocation model here since it can be overridden later.
+ JumpTableSpecialLabelPrefix = "l";
+
+ // Directives:
+ WeakDefDirective = "\t.weak_definition ";
+ WeakRefDirective = "\t.weak_reference ";
+ HiddenDirective = "\t.private_extern ";
+
+ // Sections:
+ CStringSection = "\t.cstring";
+ JumpTableDataSection = "\t.const\n";
+ BSSSection = 0;
+
+ if (TM.getRelocationModel() == Reloc::Static) {
+ StaticCtorsSection = ".constructor";
+ StaticDtorsSection = ".destructor";
+ } else {
+ StaticCtorsSection = ".mod_init_func";
+ StaticDtorsSection = ".mod_term_func";
+ }
+
+ DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
+ DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
+ DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
+ DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
+ DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
+ DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+ DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
+ DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
+ DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
+ DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
+ DwarfMacroInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
}
/// emitUsedDirectiveFor - On Darwin, internally linked data beginning with
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index fc54e23..662c667 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -374,3 +374,18 @@ FunctionPass *llvm::createIA64CodePrinterPass(raw_ostream &o,
bool verbose) {
return new IA64AsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
}
+
+namespace {
+ static struct Register {
+ Register() {
+ IA64TargetMachine::registerAsmPrinter(createIA64CodePrinterPass);
+ }
+ } Registrator;
+}
+
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeIA64AsmPrinter() { }
+}
diff --git a/lib/Target/IA64/IA64ISelLowering.cpp b/lib/Target/IA64/IA64ISelLowering.cpp
index 34a0686..c545b9c 100644
--- a/lib/Target/IA64/IA64ISelLowering.cpp
+++ b/lib/Target/IA64/IA64ISelLowering.cpp
@@ -107,6 +107,10 @@ IA64TargetLowering::IA64TargetLowering(TargetMachine &TM)
// VASTART needs to be custom lowered to use the VarArgsFrameIndex
setOperationAction(ISD::VAARG , MVT::Other, Custom);
setOperationAction(ISD::VASTART , MVT::Other, Custom);
+
+ // FIXME: These should be legal
+ setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand);
// Use the default implementation.
setOperationAction(ISD::VACOPY , MVT::Other, Expand);
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 878a00a..0b93ee5 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -19,16 +19,18 @@
#include "llvm/Target/TargetMachineRegistry.h"
using namespace llvm;
-/// IA64TargetMachineModule - Note that this is used on hosts that cannot link
-/// in a library unless there are references into the library. In particular,
-/// it seems that it is not possible to get things to work on Win32 without
-/// this. Though it is unused, do not remove it.
-extern "C" int IA64TargetMachineModule;
-int IA64TargetMachineModule = 0;
-
-static RegisterTarget<IA64TargetMachine> X("ia64",
+// Register the target
+static RegisterTarget<IA64TargetMachine> X("ia64",
"IA-64 (Itanium) [experimental]");
+// No assembler printer by default
+IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0;
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeIA64Target() { }
+}
+
const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
return new IA64TargetAsmInfo(*this);
}
@@ -88,7 +90,10 @@ bool IA64TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool Verbose,
raw_ostream &Out) {
- PM.add(createIA64CodePrinterPass(Out, *this, OptLevel, Verbose));
+ // Output assembly language.
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
return false;
}
diff --git a/lib/Target/IA64/IA64TargetMachine.h b/lib/Target/IA64/IA64TargetMachine.h
index 29d625c..a64da9f 100644
--- a/lib/Target/IA64/IA64TargetMachine.h
+++ b/lib/Target/IA64/IA64TargetMachine.h
@@ -30,24 +30,32 @@ class IA64TargetMachine : public LLVMTargetMachine {
TargetFrameInfo FrameInfo;
//IA64JITInfo JITInfo;
IA64TargetLowering TLInfo;
-
+
protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ IA64TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
public:
IA64TargetMachine(const Module &M, const std::string &FS);
virtual const IA64InstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetFrameInfo *getFrameInfo() const { return &FrameInfo; }
virtual const IA64Subtarget *getSubtargetImpl() const { return &Subtarget; }
- virtual IA64TargetLowering *getTargetLowering() const {
+ virtual IA64TargetLowering *getTargetLowering() const {
return const_cast<IA64TargetLowering*>(&TLInfo);
}
virtual const IA64RegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo();
}
virtual const TargetData *getTargetData() const { return &DataLayout; }
-
+
static unsigned getModuleMatchQuality(const Module &M);
// Pass Pipeline Configuration
@@ -56,6 +64,10 @@ public:
virtual bool addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool Verbose, raw_ostream &Out);
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
};
} // End llvm namespace
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 37e5b1e..0aff14f 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -55,6 +55,11 @@ int MSILTargetMachineModule = 0;
static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeMSILTarget() { }
+}
+
bool MSILModule::runOnModule(Module &M) {
ModulePtr = &M;
TD = &getAnalysis<TargetData>();
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 7886946..0f5244d 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -35,6 +35,11 @@ int MSP430TargetMachineModule = 0;
static RegisterTarget<MSP430TargetMachine>
X("msp430", "MSP430 [experimental]");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeMSP430Target() { }
+}
+
MSP430TargetMachine::MSP430TargetMachine(const Module &M,
const std::string &FS) :
Subtarget(*this, M, FS),
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index dfb6238..077ec96 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -578,3 +578,17 @@ doFinalization(Module &M)
return AsmPrinter::doFinalization(M);
}
+
+namespace {
+ static struct Register {
+ Register() {
+ MipsTargetMachine::registerAsmPrinter(createMipsCodePrinterPass);
+ }
+ } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeMipsAsmPrinter() { }
+}
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 4517cfc..42afceb 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -95,6 +95,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
setOperationAction(ISD::JumpTable, MVT::i32, Custom);
setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
setOperationAction(ISD::SELECT, MVT::f32, Custom);
+ setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
@@ -122,6 +123,7 @@ MipsTargetLowering(MipsTargetMachine &TM): TargetLowering(TM)
setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
// We don't have line number support yet.
setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index ef524e3..83b9b62 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -31,6 +31,14 @@ int MipsTargetMachineModule = 0;
static RegisterTarget<MipsTargetMachine> X("mips", "Mips");
static RegisterTarget<MipselTargetMachine> Y("mipsel", "Mipsel");
+MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeMipsTarget() { }
+}
+
const TargetAsmInfo *MipsTargetMachine::
createTargetAsmInfo() const
{
@@ -125,9 +133,9 @@ addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel)
// true if AssemblyEmitter is supported
bool MipsTargetMachine::
addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
- bool Verbose, raw_ostream &Out)
-{
+ bool Verbose, raw_ostream &Out) {
// Output assembly language.
- PM.add(createMipsCodePrinterPass(Out, *this, OptLevel, Verbose));
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
return false;
}
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index a9e1df2..85fafad 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -33,10 +33,23 @@ namespace llvm {
protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
-
+ protected:
+ // To avoid having target depend on the asmprinter stuff libraries,
+ // asmprinter set this functions to ctor pointer at startup time if they are
+ // linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ MipsTargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
public:
MipsTargetMachine(const Module &M, const std::string &FS, bool isLittle);
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
+
virtual const MipsInstrInfo *getInstrInfo() const
{ return &InstrInfo; }
virtual const TargetFrameInfo *getFrameInfo() const
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp
index f9a8801..ca1089b 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp
@@ -48,27 +48,10 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
const Function *F = MF.getFunction();
CurrentFnName = Mang->getValueName(F);
- // Iterate over the first basic block instructions to find if it has a
- // DebugLoc. If so emit .file directive. Instructions such as movlw do not
- // have valid DebugLoc, so need to iterate over instructions.
- MachineFunction::const_iterator I = MF.begin();
- for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end();
- MBBI != E; MBBI++) {
- const DebugLoc DLoc = MBBI->getDebugLoc();
- if (!DLoc.isUnknown()) {
- GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit;
- unsigned line = MF.getDebugLocTuple(DLoc).Line;
- DbgInfo.EmitFileDirective(CU);
- DbgInfo.SetFunctBeginLine(line);
- break;
- }
- }
-
// Emit the function frame (args and temps).
EmitFunctionFrame(MF);
- // Emit function begin debug directive.
- DbgInfo.EmitFunctBeginDI(F);
+ DbgInfo.BeginFunction(MF);
// Emit the autos section of function.
EmitAutos(CurrentFnName);
@@ -89,9 +72,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit function start label.
O << CurrentFnName << ":\n";
- // For emitting line directives, we need to keep track of the current
- // source line. When it changes then only emit the line directive.
- unsigned CurLine = 0;
+ DebugLoc CurDL;
O << "\n";
// Print out code for the function.
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
@@ -109,12 +90,9 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Emit the line directive if source line changed.
const DebugLoc DL = II->getDebugLoc();
- if (!DL.isUnknown()) {
- unsigned line = MF.getDebugLocTuple(DL).Line;
- if (line != CurLine) {
- O << "\t.line " << line << "\n";
- CurLine = line;
- }
+ if (!DL.isUnknown() && DL != CurDL) {
+ DbgInfo.ChangeDebugLoc(MF, DL);
+ CurDL = DL;
}
// Print the assembly for the instruction.
@@ -123,7 +101,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
// Emit function end debug directives.
- DbgInfo.EmitFunctEndDI(F, CurLine);
+ DbgInfo.EndFunction(MF);
return false; // we didn't modify anything.
}
@@ -226,7 +204,7 @@ bool PIC16AsmPrinter::doInitialization (Module &M) {
I->setSection(TAI->SectionForGlobal(I)->getName());
}
- DbgInfo.Init(M);
+ DbgInfo.BeginModule(M);
EmitFunctionDecls(M);
EmitUndefinedVars(M);
EmitDefinedVars(M);
@@ -313,8 +291,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M)
bool PIC16AsmPrinter::doFinalization(Module &M) {
printLibcallDecls();
EmitRemainingAutos();
- DbgInfo.EmitVarDebugInfo(M);
- DbgInfo.EmitEOF();
+ DbgInfo.EndModule(M);
O << "\n\t" << "END\n";
bool Result = AsmPrinter::doFinalization(M);
return Result;
diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h
index 8bdcf72..3ec5659 100644
--- a/lib/Target/PIC16/PIC16AsmPrinter.h
+++ b/lib/Target/PIC16/PIC16AsmPrinter.h
@@ -32,7 +32,7 @@ namespace llvm {
explicit PIC16AsmPrinter(raw_ostream &O, PIC16TargetMachine &TM,
const TargetAsmInfo *T, CodeGenOpt::Level OL,
bool V)
- : AsmPrinter(O, TM, T, OL, V), DbgInfo(O,T) {
+ : AsmPrinter(O, TM, T, OL, V), DbgInfo(O, T) {
PTLI = TM.getTargetLowering();
PTAI = static_cast<const PIC16TargetAsmInfo *> (T);
}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index d7ebea7..27551cd 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -14,91 +14,23 @@
#include "PIC16.h"
#include "PIC16DebugInfo.h"
#include "llvm/GlobalVariable.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/DebugLoc.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
- bool &HasAux, int Aux[],
- std::string &TypeName) {
- if (Ty.isBasicType(Ty.getTag())) {
- std::string Name = "";
- Ty.getName(Name);
- unsigned short BaseTy = GetTypeDebugNumber(Name);
- TypeNo = TypeNo << PIC16Dbg::S_BASIC;
- TypeNo = TypeNo | (0xffff & BaseTy);
- }
- else if (Ty.isDerivedType(Ty.getTag())) {
- switch(Ty.getTag())
- {
- case dwarf::DW_TAG_pointer_type:
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- TypeNo = TypeNo | PIC16Dbg::DT_PTR;
- break;
- default:
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- }
- DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
- PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
- }
- else if (Ty.isCompositeType(Ty.getTag())) {
- switch (Ty.getTag()) {
- case dwarf::DW_TAG_array_type: {
- DICompositeType CTy = DICompositeType(Ty.getGV());
- DIArray Elements = CTy.getTypeArray();
- unsigned short size = 1;
- unsigned short Dimension[4]={0,0,0,0};
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
- DIDescriptor Element = Elements.getElement(i);
- if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- TypeNo = TypeNo | PIC16Dbg::DT_ARY;
- DISubrange SubRange = DISubrange(Element.getGV());
- Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
- // Each dimension is represented by 2 bytes starting at byte 9.
- Aux[8+i*2+0] = Dimension[i];
- Aux[8+i*2+1] = Dimension[i] >> 8;
- size = size * Dimension[i];
- }
- }
- HasAux = true;
- // In auxillary entry for array, 7th and 8th byte represent array size.
- Aux[6] = size & 0xff;
- Aux[7] = size >> 8;
- DIType BaseType = CTy.getTypeDerivedFrom();
- PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName);
-
- break;
- }
- case dwarf:: DW_TAG_union_type:
- case dwarf::DW_TAG_structure_type: {
- DICompositeType CTy = DICompositeType(Ty.getGV());
- TypeNo = TypeNo << PIC16Dbg::S_BASIC;
- if (Ty.getTag() == dwarf::DW_TAG_structure_type)
- TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
- else
- TypeNo = TypeNo | PIC16Dbg::T_UNION;
- CTy.getName(TypeName);
- // UniqueSuffix is .number where number is obtained from
- // llvm.dbg.composite<number>.
- std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
- TypeName += UniqueSuffix;
- unsigned short size = CTy.getSizeInBits()/8;
- // 7th and 8th byte represent size.
- HasAux = true;
- Aux[6] = size & 0xff;
- Aux[7] = size >> 8;
- break;
- }
- case dwarf::DW_TAG_enumeration_type: {
- TypeNo = TypeNo << PIC16Dbg::S_BASIC;
- TypeNo = TypeNo | PIC16Dbg::T_ENUM;
- break;
- }
- default:
- TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
- }
- }
+/// PopulateDebugInfo - Populate the TypeNo, Aux[] and TagName from Ty.
+///
+void PIC16DbgInfo::PopulateDebugInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TagName) {
+ if (Ty.isBasicType(Ty.getTag()))
+ PopulateBasicTypeInfo (Ty, TypeNo);
+ else if (Ty.isDerivedType(Ty.getTag()))
+ PopulateDerivedTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+ else if (Ty.isCompositeType(Ty.getTag()))
+ PopulateCompositeTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
else {
TypeNo = PIC16Dbg::T_NULL;
HasAux = false;
@@ -106,7 +38,127 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo,
return;
}
+/// PopulateBasicTypeInfo- Populate TypeNo for basic type from Ty.
+///
+void PIC16DbgInfo::PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo) {
+ std::string Name = "";
+ Ty.getName(Name);
+ unsigned short BaseTy = GetTypeDebugNumber(Name);
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ TypeNo = TypeNo | (0xffff & BaseTy);
+}
+
+/// PopulateDerivedTypeInfo - Populate TypeNo, Aux[], TagName for derived type
+/// from Ty. Derived types are mostly pointers.
+///
+void PIC16DbgInfo::PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TagName) {
+
+ switch(Ty.getTag())
+ {
+ case dwarf::DW_TAG_pointer_type:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ TypeNo = TypeNo | PIC16Dbg::DT_PTR;
+ break;
+ default:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ }
+
+ // We also need to encode the the information about the base type of
+ // pointer in TypeNo.
+ DIType BaseType = DIDerivedType(Ty.getGV()).getTypeDerivedFrom();
+ PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateArrayTypeInfo - Populate TypeNo, Aux[] for array from Ty.
+void PIC16DbgInfo::PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TagName) {
+ DICompositeType CTy = DICompositeType(Ty.getGV());
+ DIArray Elements = CTy.getTypeArray();
+ unsigned short size = 1;
+ unsigned short Dimension[4]={0,0,0,0};
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) {
+ DIDescriptor Element = Elements.getElement(i);
+ if (Element.getTag() == dwarf::DW_TAG_subrange_type) {
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ TypeNo = TypeNo | PIC16Dbg::DT_ARY;
+ DISubrange SubRange = DISubrange(Element.getGV());
+ Dimension[i] = SubRange.getHi() - SubRange.getLo() + 1;
+ // Each dimension is represented by 2 bytes starting at byte 9.
+ Aux[8+i*2+0] = Dimension[i];
+ Aux[8+i*2+1] = Dimension[i] >> 8;
+ size = size * Dimension[i];
+ }
+ }
+ HasAux = true;
+ // In auxillary entry for array, 7th and 8th byte represent array size.
+ Aux[6] = size & 0xff;
+ Aux[7] = size >> 8;
+ DIType BaseType = CTy.getTypeDerivedFrom();
+ PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TagName);
+}
+
+/// PopulateStructOrUnionTypeInfo - Populate TypeNo, Aux[] , TagName for
+/// structure or union.
+///
+void PIC16DbgInfo::PopulateStructOrUnionTypeInfo (DIType Ty,
+ unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TagName) {
+ DICompositeType CTy = DICompositeType(Ty.getGV());
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ if (Ty.getTag() == dwarf::DW_TAG_structure_type)
+ TypeNo = TypeNo | PIC16Dbg::T_STRUCT;
+ else
+ TypeNo = TypeNo | PIC16Dbg::T_UNION;
+ CTy.getName(TagName);
+ // UniqueSuffix is .number where number is obtained from
+ // llvm.dbg.composite<number>.
+ std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18);
+ TagName += UniqueSuffix;
+ unsigned short size = CTy.getSizeInBits()/8;
+ // 7th and 8th byte represent size.
+ HasAux = true;
+ Aux[6] = size & 0xff;
+ Aux[7] = size >> 8;
+}
+
+/// PopulateEnumTypeInfo - Populate TypeNo for enum from Ty.
+void PIC16DbgInfo::PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo) {
+ TypeNo = TypeNo << PIC16Dbg::S_BASIC;
+ TypeNo = TypeNo | PIC16Dbg::T_ENUM;
+}
+
+/// PopulateCompositeTypeInfo - Populate TypeNo, Aux[] and TagName for
+/// composite types from Ty.
+///
+void PIC16DbgInfo::PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TagName) {
+ switch (Ty.getTag()) {
+ case dwarf::DW_TAG_array_type: {
+ PopulateArrayTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+ break;
+ }
+ case dwarf:: DW_TAG_union_type:
+ case dwarf::DW_TAG_structure_type: {
+ PopulateStructOrUnionTypeInfo (Ty, TypeNo, HasAux, Aux, TagName);
+ break;
+ }
+ case dwarf::DW_TAG_enumeration_type: {
+ PopulateEnumTypeInfo (Ty, TypeNo);
+ break;
+ }
+ default:
+ TypeNo = TypeNo << PIC16Dbg::S_DERIVED;
+ }
+}
+
+/// GetTypeDebugNumber - Get debug type number for given type.
+///
unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) {
if (type == "char")
return PIC16Dbg::T_CHAR;
@@ -127,8 +179,10 @@ unsigned PIC16DbgInfo::GetTypeDebugNumber(std::string &type) {
else
return 0;
}
-
-short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
+
+/// GetStorageClass - Get storage class for give debug variable.
+///
+short PIC16DbgInfo::getStorageClass(DIGlobalVariable DIGV) {
short ClassNo;
if (PAN::isLocalName(DIGV.getGlobal()->getName())) {
// Generating C_AUTO here fails due to error in linker. Change it once
@@ -142,12 +196,126 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) {
return ClassNo;
}
-void PIC16DbgInfo::Init(Module &M) {
- // Do all debug related initializations here.
- EmitFileDirective(M);
+/// BeginModule - Emit necessary debug info to start a Module and do other
+/// required initializations.
+void PIC16DbgInfo::BeginModule(Module &M) {
+ // Emit file directive for module.
+ GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
+ if (CU) {
+ EmitDebugDirectives = true;
+ SwitchToCU(CU);
+ }
+
+ // Emit debug info for decls of composite types.
EmitCompositeTypeDecls(M);
}
+/// Helper to find first valid debug loc for a function.
+///
+static const DebugLoc GetDebugLocForFunction(const MachineFunction &MF) {
+ DebugLoc DL;
+ for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end();
+ II != E; ++II) {
+ DL = II->getDebugLoc();
+ if (!DL.isUnknown())
+ return DL;
+ }
+ }
+ return DL;
+}
+
+/// BeginFunction - Emit necessary debug info to start a function.
+///
+void PIC16DbgInfo::BeginFunction(const MachineFunction &MF) {
+ if (! EmitDebugDirectives) return;
+
+ // Retreive the first valid debug Loc and process it.
+ const DebugLoc &DL = GetDebugLocForFunction(MF);
+ ChangeDebugLoc(MF, DL, true);
+
+ EmitFunctBeginDI(MF.getFunction());
+
+ // Set current line to 0 so that.line directive is genearted after .bf.
+ CurLine = 0;
+}
+
+/// ChangeDebugLoc - Take necessary steps when DebugLoc changes.
+/// CurFile and CurLine may change as a result of this.
+///
+void PIC16DbgInfo::ChangeDebugLoc(const MachineFunction &MF,
+ const DebugLoc &DL, bool IsInBeginFunction) {
+ if (! EmitDebugDirectives) return;
+ assert (! DL.isUnknown() && "can't change to invalid debug loc");
+
+ GlobalVariable *CU = MF.getDebugLocTuple(DL).CompileUnit;
+ unsigned line = MF.getDebugLocTuple(DL).Line;
+
+ SwitchToCU(CU);
+ SwitchToLine(line, IsInBeginFunction);
+}
+
+/// SwitchToLine - Emit line directive for a new line.
+///
+void PIC16DbgInfo::SwitchToLine(unsigned Line, bool IsInBeginFunction) {
+ if (CurLine == Line) return;
+ if (!IsInBeginFunction) O << "\n\t.line " << Line << "\n";
+ CurLine = Line;
+}
+
+/// EndFunction - Emit .ef for end of function.
+///
+void PIC16DbgInfo::EndFunction(const MachineFunction &MF) {
+ if (! EmitDebugDirectives) return;
+ EmitFunctEndDI(MF.getFunction(), CurLine);
+}
+
+/// EndModule - Emit .eof for end of module.
+///
+void PIC16DbgInfo::EndModule(Module &M) {
+ if (! EmitDebugDirectives) return;
+ EmitVarDebugInfo(M);
+ if (CurFile != "") O << "\n\t.eof";
+}
+
+/// EmitCompositeTypeElements - Emit debug information for members of a
+/// composite type.
+///
+void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
+ std::string UniqueSuffix) {
+ unsigned long Value = 0;
+ DIArray Elements = CTy.getTypeArray();
+ for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
+ DIDescriptor Element = Elements.getElement(i);
+ unsigned short TypeNo = 0;
+ bool HasAux = false;
+ int ElementAux[PIC16Dbg::AuxSize] = { 0 };
+ std::string TagName = "";
+ std::string ElementName;
+ GlobalVariable *GV = Element.getGV();
+ DIDerivedType DITy(GV);
+ DITy.getName(ElementName);
+ unsigned short ElementSize = DITy.getSizeInBits()/8;
+ // Get mangleddd name for this structure/union element.
+ std::string MangMemName = ElementName + UniqueSuffix;
+ PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
+ short Class;
+ if( CTy.getTag() == dwarf::DW_TAG_union_type)
+ Class = PIC16Dbg::C_MOU;
+ else if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+ Class = PIC16Dbg::C_MOS;
+ EmitSymbol(MangMemName, Class, TypeNo, Value);
+ if (CTy.getTag() == dwarf::DW_TAG_structure_type)
+ Value += ElementSize;
+ if (HasAux)
+ EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TagName);
+ }
+}
+
+/// EmitCompositeTypeDecls - Emit composite type declarations like structure
+/// and union declarations.
+///
void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(),
E = M.getGlobalList().end(); I != E; I++) {
@@ -178,33 +346,10 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
// Emit auxiliary debug information for structure/union tag.
EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize);
- unsigned long Value = 0;
- DIArray Elements = CTy.getTypeArray();
- for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) {
- DIDescriptor Element = Elements.getElement(i);
- unsigned short TypeNo = 0;
- bool HasAux = false;
- int ElementAux[PIC16Dbg::AuxSize] = { 0 };
- std::string TypeName = "";
- std::string ElementName;
- GlobalVariable *GV = Element.getGV();
- DIDerivedType DITy(GV);
- DITy.getName(ElementName);
- unsigned short ElementSize = DITy.getSizeInBits()/8;
- // Get mangleddd name for this structure/union element.
- std::string MangMemName = ElementName + UniqueSuffix;
- PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName);
- short Class;
- if( CTy.getTag() == dwarf::DW_TAG_union_type)
- Class = PIC16Dbg::C_MOU;
- else if (CTy.getTag() == dwarf::DW_TAG_structure_type)
- Class = PIC16Dbg::C_MOS;
- EmitSymbol(MangMemName, Class, TypeNo, Value);
- if (CTy.getTag() == dwarf::DW_TAG_structure_type)
- Value += ElementSize;
- if (HasAux)
- EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName);
- }
+
+ // Emit members.
+ EmitCompositeTypeElements (CTy, UniqueSuffix);
+
// Emit mangled Symbol for end of structure/union.
std::string EOSSymbol = ".eos" + UniqueSuffix;
EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS);
@@ -214,6 +359,8 @@ void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) {
}
}
+/// EmitFunctBeginDI - Emit .bf for function.
+///
void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
std::string FunctName = F->getName();
if (EmitDebugDirectives) {
@@ -221,16 +368,20 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) {
std::string BlockBeginSym = ".bb." + FunctName;
int BFAux[PIC16Dbg::AuxSize] = {0};
- BFAux[4] = FunctBeginLine;
- BFAux[5] = FunctBeginLine >> 8;
+ BFAux[4] = CurLine;
+ BFAux[5] = CurLine >> 8;
+
// Emit debug directives for beginning of function.
EmitSymbol(FunctBeginSym, PIC16Dbg::C_FCN);
EmitAuxEntry(FunctBeginSym, BFAux, PIC16Dbg::AuxSize);
+
EmitSymbol(BlockBeginSym, PIC16Dbg::C_BLOCK);
EmitAuxEntry(BlockBeginSym, BFAux, PIC16Dbg::AuxSize);
}
}
+/// EmitFunctEndDI - Emit .ef for function end.
+///
void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
std::string FunctName = F->getName();
if (EmitDebugDirectives) {
@@ -241,8 +392,8 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
EmitSymbol(BlockEndSym, PIC16Dbg::C_BLOCK);
int EFAux[PIC16Dbg::AuxSize] = {0};
// 5th and 6th byte stand for line number.
- EFAux[4] = Line;
- EFAux[5] = Line >> 8;
+ EFAux[4] = CurLine;
+ EFAux[5] = CurLine >> 8;
EmitAuxEntry(BlockEndSym, EFAux, PIC16Dbg::AuxSize);
EmitSymbol(FunctEndSym, PIC16Dbg::C_FCN);
EmitAuxEntry(FunctEndSym, EFAux, PIC16Dbg::AuxSize);
@@ -251,15 +402,18 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) {
/// EmitAuxEntry - Emit Auxiliary debug information.
///
-void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num,
- std::string tag) {
+void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
+ std::string TagName) {
O << "\n\t.dim " << VarName << ", 1" ;
- if (tag != "")
- O << ", " << tag;
- for (int i = 0; i<num; i++)
+ // TagName is emitted in case of structure/union objects.
+ if (TagName != "")
+ O << ", " << TagName;
+ for (int i = 0; i<Num; i++)
O << "," << Aux[i];
}
+/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
+///
void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
Type, unsigned long Value) {
O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = "
@@ -268,6 +422,8 @@ void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short
O << ", value = " << Value;
}
+/// EmitVarDebugInfo - Emit debug information for all variables.
+///
void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.global_variables");
if (!Root)
@@ -283,47 +439,45 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) {
unsigned short TypeNo = 0;
bool HasAux = false;
int Aux[PIC16Dbg::AuxSize] = { 0 };
- std::string TypeName = "";
+ std::string TagName = "";
std::string VarName = TAI->getGlobalPrefix()+DIGV.getGlobal()->getName();
- PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TypeName);
+ PopulateDebugInfo(Ty, TypeNo, HasAux, Aux, TagName);
// Emit debug info only if type information is availaible.
if (TypeNo != PIC16Dbg::T_NULL) {
O << "\n\t.type " << VarName << ", " << TypeNo;
- short ClassNo = getClass(DIGV);
+ short ClassNo = getStorageClass(DIGV);
O << "\n\t.class " << VarName << ", " << ClassNo;
if (HasAux)
- EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName);
+ EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TagName);
}
}
}
O << "\n";
}
-void PIC16DbgInfo::EmitFileDirective(Module &M) {
- GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit");
- if (CU) {
- EmitDebugDirectives = true;
- EmitFileDirective(CU, false);
- }
-}
+/// SwitchToCU - Switch to a new compilation unit.
+///
+void PIC16DbgInfo::SwitchToCU(GlobalVariable *CU) {
+ // Get the file path from CU.
+ DICompileUnit cu(CU);
+ std::string DirName, FileName;
+ std::string FilePath = cu.getDirectory(DirName) + "/" +
+ cu.getFilename(FileName);
-void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) {
- std::string Dir, FN;
- DICompileUnit DIUnit(CU);
- std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN);
- if ( File != CurFile ) {
- if (EmitEof)
- EmitEOF();
- O << "\n\t.file\t\"" << File << "\"\n" ;
- CurFile = File;
- }
+ // Nothing to do if source file is still same.
+ if ( FilePath == CurFile ) return;
+
+ // Else, close the current one and start a new.
+ if (CurFile != "") O << "\n\t.eof";
+ O << "\n\t.file\t\"" << FilePath << "\"\n" ;
+ CurFile = FilePath;
+ CurLine = 0;
}
+/// EmitEOF - Emit .eof for end of file.
+///
void PIC16DbgInfo::EmitEOF() {
if (CurFile != "")
O << "\n\t.EOF";
}
-void PIC16DbgInfo::SetFunctBeginLine(unsigned line) {
- FunctBeginLine = line;
-}
diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h
index 9d50380..d126d85 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.h
+++ b/lib/Target/PIC16/PIC16DebugInfo.h
@@ -20,6 +20,8 @@
#include <map>
namespace llvm {
+ class MachineFunction;
+ class DebugLoc;
namespace PIC16Dbg {
enum VarType {
T_NULL,
@@ -94,33 +96,64 @@ namespace llvm {
raw_ostream &O;
const TargetAsmInfo *TAI;
std::string CurFile;
+ unsigned CurLine;
+
// EmitDebugDirectives is set if debug information is available. Default
// value for it is false.
bool EmitDebugDirectives;
- unsigned FunctBeginLine;
+
public:
PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) {
- CurFile = "";
+ CurFile = "";
+ CurLine = 0;
EmitDebugDirectives = false;
}
- void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux,
+
+ void BeginModule (Module &M);
+ void BeginFunction (const MachineFunction &MF);
+ void ChangeDebugLoc (const MachineFunction &MF, const DebugLoc &DL,
+ bool IsInBeginFunction = false);
+ void EndFunction (const MachineFunction &MF);
+ void EndModule (Module &M);
+
+
+ private:
+ void SwitchToCU (GlobalVariable *CU);
+ void SwitchToLine (unsigned Line, bool IsInBeginFunction = false);
+
+ void PopulateDebugInfo (DIType Ty, unsigned short &TypeNo, bool &HasAux,
int Aux[], std::string &TypeName);
- unsigned GetTypeDebugNumber(std::string &type);
- short getClass(DIGlobalVariable DIGV);
+ void PopulateBasicTypeInfo (DIType Ty, unsigned short &TypeNo);
+ void PopulateDerivedTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TypeName);
+
+ void PopulateCompositeTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TypeName);
+ void PopulateArrayTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TypeName);
+
+ void PopulateStructOrUnionTypeInfo (DIType Ty, unsigned short &TypeNo,
+ bool &HasAux, int Aux[],
+ std::string &TypeName);
+ void PopulateEnumTypeInfo (DIType Ty, unsigned short &TypeNo);
+
+ unsigned GetTypeDebugNumber(std::string &Type);
+ short getStorageClass(DIGlobalVariable DIGV);
void EmitFunctBeginDI(const Function *F);
- void Init(Module &M);
void EmitCompositeTypeDecls(Module &M);
+ void EmitCompositeTypeElements (DICompositeType CTy,
+ std::string UniqueSuffix);
void EmitFunctEndDI(const Function *F, unsigned Line);
void EmitAuxEntry(const std::string VarName, int Aux[],
- int num = PIC16Dbg::AuxSize, std::string tag = "");
+ int num = PIC16Dbg::AuxSize, std::string TagName = "");
inline void EmitSymbol(std::string Name, short Class,
unsigned short Type = PIC16Dbg::T_NULL,
unsigned long Value = 0);
void EmitVarDebugInfo(Module &M);
- void EmitFileDirective(Module &M);
- void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true);
void EmitEOF();
- void SetFunctBeginLine(unsigned line);
};
} // end namespace llvm;
#endif
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index ba465f3..f113a48 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -31,42 +31,72 @@ static const char *getIntrinsicName(unsigned opcode) {
std::string Basename;
switch(opcode) {
default: assert (0 && "do not know intrinsic name");
+ // Arithmetic Right shift for integer types.
case PIC16ISD::SRA_I8: Basename = "sra.i8"; break;
case RTLIB::SRA_I16: Basename = "sra.i16"; break;
case RTLIB::SRA_I32: Basename = "sra.i32"; break;
+ // Left shift for integer types.
case PIC16ISD::SLL_I8: Basename = "sll.i8"; break;
case RTLIB::SHL_I16: Basename = "sll.i16"; break;
case RTLIB::SHL_I32: Basename = "sll.i32"; break;
+ // Logical Right Shift for integer types.
case PIC16ISD::SRL_I8: Basename = "srl.i8"; break;
case RTLIB::SRL_I16: Basename = "srl.i16"; break;
case RTLIB::SRL_I32: Basename = "srl.i32"; break;
+ // Multiply for integer types.
case PIC16ISD::MUL_I8: Basename = "mul.i8"; break;
case RTLIB::MUL_I16: Basename = "mul.i16"; break;
case RTLIB::MUL_I32: Basename = "mul.i32"; break;
+ // Signed division for integers.
case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break;
case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break;
+
+ // Unsigned division for integers.
case RTLIB::UDIV_I16: Basename = "udiv.i16"; break;
case RTLIB::UDIV_I32: Basename = "udiv.i32"; break;
+ // Signed Modulas for integers.
case RTLIB::SREM_I16: Basename = "srem.i16"; break;
case RTLIB::SREM_I32: Basename = "srem.i32"; break;
+
+ // Unsigned Modulas for integers.
case RTLIB::UREM_I16: Basename = "urem.i16"; break;
case RTLIB::UREM_I32: Basename = "urem.i32"; break;
- case RTLIB::FPTOSINT_F32_I32:
- Basename = "f32_to_si32"; break;
- case RTLIB::SINTTOFP_I32_F32:
- Basename = "si32_to_f32"; break;
+ //////////////////////
+ // LIBCALLS FOR FLOATS
+ //////////////////////
+
+ // Float to signed integrals
+ case RTLIB::FPTOSINT_F32_I8: Basename = "f32_to_si32"; break;
+ case RTLIB::FPTOSINT_F32_I16: Basename = "f32_to_si32"; break;
+ case RTLIB::FPTOSINT_F32_I32: Basename = "f32_to_si32"; break;
+
+ // Signed integrals to float. char and int are first sign extended to i32
+ // before being converted to float, so an I8_F32 or I16_F32 isn't required.
+ case RTLIB::SINTTOFP_I32_F32: Basename = "si32_to_f32"; break;
+
+ // Float to Unsigned conversions.
+ // Signed conversion can be used for unsigned conversion as well.
+ // In signed and unsigned versions only the interpretation of the
+ // MSB is different. Bit representation remains the same.
+ case RTLIB::FPTOUINT_F32_I8: Basename = "f32_to_si32"; break;
+ case RTLIB::FPTOUINT_F32_I16: Basename = "f32_to_si32"; break;
+ case RTLIB::FPTOUINT_F32_I32: Basename = "f32_to_si32"; break;
+
+ // Unsigned to Float conversions. char and int are first zero extended
+ // before being converted to float.
+ case RTLIB::UINTTOFP_I32_F32: Basename = "ui32_to_f32"; break;
+ // Floating point add, sub, mul, div.
case RTLIB::ADD_F32: Basename = "add.f32"; break;
case RTLIB::SUB_F32: Basename = "sub.f32"; break;
case RTLIB::MUL_F32: Basename = "mul.f32"; break;
case RTLIB::DIV_F32: Basename = "div.f32"; break;
-
}
std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -83,7 +113,7 @@ static const char *getIntrinsicName(unsigned opcode) {
// PIC16TargetLowering Constructor.
PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
: TargetLowering(TM), TmpSize(0) {
-
+
Subtarget = &TM.getSubtarget<PIC16Subtarget>();
addRegisterClass(MVT::i8, PIC16::GPRRegisterClass);
@@ -114,6 +144,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
// Signed division lib call names
setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16));
setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32));
+
// Unsigned division lib call names
setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16));
setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32));
@@ -121,15 +152,36 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
// Signed remainder lib call names
setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16));
setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32));
+
// Unsigned remainder lib call names
setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16));
setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32));
- // Floating point operations
+ // Floating point to signed int conversions.
+ setLibcallName(RTLIB::FPTOSINT_F32_I8,
+ getIntrinsicName(RTLIB::FPTOSINT_F32_I8));
+ setLibcallName(RTLIB::FPTOSINT_F32_I16,
+ getIntrinsicName(RTLIB::FPTOSINT_F32_I16));
setLibcallName(RTLIB::FPTOSINT_F32_I32,
getIntrinsicName(RTLIB::FPTOSINT_F32_I32));
+
+ // Signed int to floats.
setLibcallName(RTLIB::SINTTOFP_I32_F32,
getIntrinsicName(RTLIB::SINTTOFP_I32_F32));
+
+ // Floating points to unsigned ints.
+ setLibcallName(RTLIB::FPTOUINT_F32_I8,
+ getIntrinsicName(RTLIB::FPTOUINT_F32_I8));
+ setLibcallName(RTLIB::FPTOUINT_F32_I16,
+ getIntrinsicName(RTLIB::FPTOUINT_F32_I16));
+ setLibcallName(RTLIB::FPTOUINT_F32_I32,
+ getIntrinsicName(RTLIB::FPTOUINT_F32_I32));
+
+ // Unsigned int to floats.
+ setLibcallName(RTLIB::UINTTOFP_I32_F32,
+ getIntrinsicName(RTLIB::UINTTOFP_I32_F32));
+
+ // Floating point add, sub, mul ,div.
setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32));
setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32));
setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index bda6326..d4f46a4 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -37,6 +37,11 @@ X("pic16", "PIC16 14-bit [experimental].");
static RegisterTarget<CooperTargetMachine>
Y("cooper", "PIC16 Cooper [experimental].");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializePIC16Target() { }
+}
+
// PIC16TargetMachine - Traditional PIC16 Machine.
PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
bool Cooper)
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 7723982..c7bfb6d 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -646,19 +646,10 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
bool Result = AsmPrinter::doInitialization(M);
-
- // Emit initial debug information.
+ DW = getAnalysisIfAvailable<DwarfWriter>();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
assert(MMI);
- DW = getAnalysisIfAvailable<DwarfWriter>();
- assert(DW && "DwarfWriter is not available");
- DW->BeginModule(&M, MMI, O, this, TAI);
-
- // GNU as handles section names wrapped in quotes
- Mang->setUseQuotes(true);
-
SwitchToSection(TAI->getTextSection());
-
return Result;
}
@@ -875,18 +866,9 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
O << "\t.machine " << CPUDirectives[Directive] << '\n';
bool Result = AsmPrinter::doInitialization(M);
-
- // Emit initial debug information.
- // We need this for Personality functions.
- // AsmPrinter::doInitialization should have done this analysis.
+ DW = getAnalysisIfAvailable<DwarfWriter>();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
assert(MMI);
- DW = getAnalysisIfAvailable<DwarfWriter>();
- assert(DW && "DwarfWriter is not available");
- DW->BeginModule(&M, MMI, O, this, TAI);
-
- // Darwin wants symbols to be quoted if they have complex names.
- Mang->setUseQuotes(true);
// Prime text sections so they are adjacent. This reduces the likelihood a
// large data or debug section causes a branch to exceed 16M limit.
@@ -1202,3 +1184,9 @@ namespace {
extern "C" int PowerPCAsmPrinterForceLink;
int PowerPCAsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializePowerPCAsmPrinter() { }
+}
diff --git a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
index c69e591..ebffd69 100644
--- a/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
+++ b/lib/Target/PowerPC/PPCTargetAsmInfo.cpp
@@ -19,59 +19,19 @@
using namespace llvm;
using namespace llvm::dwarf;
-PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM):
+PPCDarwinTargetAsmInfo::PPCDarwinTargetAsmInfo(const PPCTargetMachine &TM) :
PPCTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
PCSymbol = ".";
CommentString = ";";
- GlobalPrefix = "_";
- PrivateGlobalPrefix = "L";
- LessPrivateGlobalPrefix = "l";
- StringConstantPrefix = "\1LC";
ConstantPoolSection = "\t.const\t";
- JumpTableDataSection = ".const";
- CStringSection = "\t.cstring";
- if (TM.getRelocationModel() == Reloc::Static) {
- StaticCtorsSection = ".constructor";
- StaticDtorsSection = ".destructor";
- } else {
- StaticCtorsSection = ".mod_init_func";
- StaticDtorsSection = ".mod_term_func";
- }
- HasSingleParameterDotFile = false;
- SwitchToSectionDirective = "\t.section ";
UsedDirective = "\t.no_dead_strip\t";
- WeakDefDirective = "\t.weak_definition ";
- WeakRefDirective = "\t.weak_reference ";
- HiddenDirective = "\t.private_extern ";
SupportsExceptionHandling = true;
- NeedsIndirectEncoding = true;
- NeedsSet = true;
- BSSSection = 0;
DwarfEHFrameSection =
- ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
+ ".section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support";
DwarfExceptionSection = ".section __DATA,__gcc_except_tab";
GlobalEHDirective = "\t.globl\t";
SupportsWeakOmittedEHFrame = false;
-
- DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
- DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
- DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
- DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
- DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
- DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
- DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
- DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
- DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
- DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
- DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
-
- // In non-PIC modes, emit a special label before jump tables so that the
- // linker can perform more accurate dead code stripping.
- if (TM.getRelocationModel() != Reloc::PIC_) {
- // Emit a local label that is preserved until the linker runs.
- JumpTableSpecialLabelPrefix = "l";
- }
}
/// PreferredEHDataFormat - This hook allows the target to select data
@@ -131,7 +91,7 @@ PPCLinuxTargetAsmInfo::PPCLinuxTargetAsmInfo(const PPCTargetMachine &TM) :
DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
- DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+ DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
PCSymbol = ".";
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index ef3f0fc..3e89885 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -35,6 +35,11 @@ X("ppc32", "PowerPC 32");
static RegisterTarget<PPC64TargetMachine>
Y("ppc64", "PowerPC 64");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializePowerPCTarget() { }
+}
+
// No assembler printer by default
PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 61707f5..6a2fdca 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "Sparc.h"
#include "SparcInstrInfo.h"
+#include "SparcTargetMachine.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
@@ -24,8 +25,6 @@
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Target/TargetAsmInfo.h"
-#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/Mangler.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
@@ -88,6 +87,7 @@ FunctionPass *llvm::createSparcCodePrinterPass(raw_ostream &o,
return new SparcAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
}
+
/// runOnMachineFunction - This uses the printInstruction()
/// method to print assembly for each instruction.
///
@@ -353,3 +353,17 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+
+namespace {
+ static struct Register {
+ Register() {
+ SparcTargetMachine::registerAsmPrinter(createSparcCodePrinterPass);
+ }
+ } Registrator;
+}
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeSparcAsmPrinter() { }
+}
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index eda0309..fd0f124 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -18,17 +18,18 @@
#include "llvm/Target/TargetMachineRegistry.h"
using namespace llvm;
-/// SparcTargetMachineModule - Note that this is used on hosts that
-/// cannot link in a library unless there are references into the
-/// library. In particular, it seems that it is not possible to get
-/// things to work on Win32 without this. Though it is unused, do not
-/// remove it.
-extern "C" int SparcTargetMachineModule;
-int SparcTargetMachineModule = 0;
-
// Register the target.
static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
+// No assembler printer by default
+SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
+
+
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeSparcTarget() { }
+}
+
const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
// FIXME: Handle Solaris subtarget someday :)
return new SparcELFTargetAsmInfo(*this);
@@ -89,6 +90,8 @@ bool SparcTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
bool Verbose,
raw_ostream &Out) {
// Output assembly language.
- PM.add(createSparcCodePrinterPass(Out, *this, OptLevel, Verbose));
+ assert(AsmPrinterCtor && "AsmPrinter was not linked in");
+ if (AsmPrinterCtor)
+ PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
return false;
}
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index 40b44f2..8afcc73 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -35,6 +35,14 @@ class SparcTargetMachine : public LLVMTargetMachine {
protected:
virtual const TargetAsmInfo *createTargetAsmInfo() const;
+ // To avoid having target depend on the asmprinter stuff libraries, asmprinter
+ // set this functions to ctor pointer at startup time if they are linked in.
+ typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
+ TargetMachine &tm,
+ CodeGenOpt::Level OptLevel,
+ bool verbose);
+ static AsmPrinterCtorFn AsmPrinterCtor;
+
public:
SparcTargetMachine(const Module &M, const std::string &FS);
@@ -56,6 +64,10 @@ public:
virtual bool addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
bool Verbose, raw_ostream &Out);
+
+ static void registerAsmPrinter(AsmPrinterCtorFn F) {
+ AsmPrinterCtor = F;
+ }
};
} // end namespace llvm
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
index 6a2de6f..3f5f1bd 100644
--- a/lib/Target/TargetAsmInfo.cpp
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -24,10 +24,10 @@
#include "llvm/Support/Dwarf.h"
#include <cctype>
#include <cstring>
-
using namespace llvm;
-void TargetAsmInfo::fillDefaultValues() {
+TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
+: TM(tm) {
BSSSection = "\t.bss";
BSSSection_ = 0;
ReadOnlySection = 0;
@@ -58,6 +58,7 @@ void TargetAsmInfo::fillDefaultValues() {
InlineAsmEnd = "#NO_APP";
AssemblerDialect = 0;
StringConstantPrefix = ".str";
+ AllowQuotesInName = false;
ZeroDirective = "\t.zero\t";
ZeroDirectiveSuffix = 0;
AsciiDirective = "\t.ascii\t";
@@ -102,7 +103,6 @@ void TargetAsmInfo::fillDefaultValues() {
SupportsExceptionHandling = false;
DwarfRequiresFrameSection = true;
DwarfUsesInlineInfoSection = false;
- SupportsMacInfoSection = true;
NonLocalEHFrameLabel = false;
GlobalEHDirective = 0;
SupportsWeakOmittedEHFrame = true;
@@ -118,7 +118,7 @@ void TargetAsmInfo::fillDefaultValues() {
DwarfLocSection = ".debug_loc";
DwarfARangesSection = ".debug_aranges";
DwarfRangesSection = ".debug_ranges";
- DwarfMacInfoSection = ".debug_macinfo";
+ DwarfMacroInfoSection = ".debug_macinfo";
DwarfEHFrameSection = ".eh_frame";
DwarfExceptionSection = ".gcc_except_table";
AsmTransCBE = 0;
@@ -126,11 +126,6 @@ void TargetAsmInfo::fillDefaultValues() {
DataSection = getUnnamedSection("\t.data", SectionFlags::Writeable);
}
-TargetAsmInfo::TargetAsmInfo(const TargetMachine &tm)
- : TM(tm) {
- fillDefaultValues();
-}
-
TargetAsmInfo::~TargetAsmInfo() {
}
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index dbd03d8..368bcaa 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -2,6 +2,7 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
add_partially_linked_object(LLVMX86AsmPrinter
X86ATTAsmPrinter.cpp
+ X86ATTInstPrinter.cpp
X86AsmPrinter.cpp
X86IntelAsmPrinter.cpp
)
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index 8afe2ea..60ed4f0 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -26,8 +26,10 @@
#include "llvm/Type.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCInst.h"
#include "llvm/CodeGen/DwarfWriter.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Mangler.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetAsmInfo.h"
@@ -36,6 +38,9 @@ using namespace llvm;
STATISTIC(EmittedInsts, "Number of machine instrs printed");
+static cl::opt<bool> NewAsmPrinter("experimental-asm-printer",
+ cl::Hidden);
+
static std::string getPICLabelString(unsigned FnNum,
const TargetAsmInfo *TAI,
const X86Subtarget* Subtarget) {
@@ -266,7 +271,7 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
O << "\t.size\t" << CurrentFnName << ", .-" << CurrentFnName << '\n';
// Emit post-function debug information.
- if (TAI->doesSupportDebugInformation())
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
DW->EndFunction(&MF);
// Print out jump tables referenced by the function.
@@ -291,6 +296,136 @@ static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
return ST->isPICStyleStub() && TM.getRelocationModel() != Reloc::Static;
}
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: assert(0 && "Unknown pcrel immediate operand");
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ const GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+ decorateName(Name, GV);
+
+ bool needCloseParen = false;
+ if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ if (shouldPrintStub(TM, Subtarget)) {
+ // Link-once, declaration, or Weakly-linked global variables need
+ // non-lazily-resolved stubs
+ if (GV->isDeclaration() || GV->isWeakForLinker()) {
+ // Dynamically-resolved functions need a stub for the function.
+ if (isa<Function>(GV)) {
+ // Function stubs are no longer needed for Mac OS X 10.5 and up.
+ if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
+ O << Name;
+ } else {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ }
+ } else if (GV->hasHiddenVisibility()) {
+ if (!GV->isDeclaration() && !GV->hasCommonLinkage())
+ // Definition is not definitely in the current translation unit.
+ O << Name;
+ else {
+ HiddenGVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ GVStubs.insert(Name);
+ printSuffixedName(Name, "$non_lazy_ptr");
+ }
+ } else {
+ if (GV->hasDLLImportLinkage())
+ O << "__imp_";
+ O << Name;
+ }
+ } else {
+ if (GV->hasDLLImportLinkage()) {
+ O << "__imp_";
+ }
+ O << Name;
+
+ if (shouldPrintPLT(TM, Subtarget)) {
+ // Assemble call via PLT for externally visible symbols
+ if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
+ !GV->hasLocalLinkage())
+ O << "@PLT";
+ }
+ if (Subtarget->isTargetCygMing() && GV->isDeclaration())
+ // Save function name for later type emission
+ FnStubs.insert(Name);
+ }
+
+ if (GV->hasExternalWeakLinkage())
+ ExtWeakSymbols.insert(GV);
+
+ printOffset(MO.getOffset());
+
+ if (needCloseParen)
+ O << ')';
+ return;
+ }
+
+ case MachineOperand::MO_ExternalSymbol: {
+ bool needCloseParen = false;
+ std::string Name(TAI->getGlobalPrefix());
+ Name += MO.getSymbolName();
+ // Print function stub suffix unless it's Mac OS X 10.5 and up.
+ if (shouldPrintStub(TM, Subtarget) &&
+ !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+ FnStubs.insert(Name);
+ printSuffixedName(Name, "$stub");
+ return;
+ }
+
+ if (Name[0] == '$') {
+ // The name begins with a dollar-sign. In order to avoid having it look
+ // like an integer immediate to the assembler, enclose it in parens.
+ O << '(';
+ needCloseParen = true;
+ }
+
+ O << Name;
+
+ if (shouldPrintPLT(TM, Subtarget)) {
+ std::string GOTName(TAI->getGlobalPrefix());
+ GOTName+="_GLOBAL_OFFSET_TABLE_";
+ if (Name == GOTName)
+ // HACK! Emit extra offset to PC during printing GOT offset to
+ // compensate for the size of popl instruction. The resulting code
+ // should look like:
+ // call .piclabel
+ // piclabel:
+ // popl %some_register
+ // addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
+ O << " + [.-"
+ << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
+
+ O << "@PLT";
+ }
+
+ if (needCloseParen)
+ O << ')';
+
+ return;
+ }
+ }
+}
+
void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
const char *Modifier, bool NotRIPRel) {
const MachineOperand &MO = MI->getOperand(OpNo);
@@ -312,14 +447,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case MachineOperand::MO_Immediate:
if (!Modifier || (strcmp(Modifier, "debug") &&
- strcmp(Modifier, "mem") &&
- strcmp(Modifier, "call")))
+ strcmp(Modifier, "mem")))
O << '$';
O << MO.getImm();
return;
- case MachineOperand::MO_MachineBasicBlock:
- printBasicBlockLabel(MO.getMBB(), false, false, VerboseAsm);
- return;
case MachineOperand::MO_JumpTableIndex: {
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
if (!isMemOp) O << '$';
@@ -359,8 +490,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
return;
}
case MachineOperand::MO_GlobalAddress: {
- bool isCallOp = Modifier && !strcmp(Modifier, "call");
- bool isMemOp = Modifier && !strcmp(Modifier, "mem");
+ bool isMemOp = Modifier && !strcmp(Modifier, "mem");
bool needCloseParen = false;
const GlobalValue *GV = MO.getGlobal();
@@ -369,7 +499,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
- GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
}
bool isThreadLocal = GVar && GVar->isThreadLocal();
@@ -377,7 +507,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
std::string Name = Mang->getValueName(GV);
decorateName(Name, GV);
- if (!isMemOp && !isCallOp)
+ if (!isMemOp)
O << '$';
else if (Name[0] == '$') {
// The name begins with a dollar-sign. In order to avoid having it look
@@ -391,15 +521,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// non-lazily-resolved stubs
if (GV->isDeclaration() || GV->isWeakForLinker()) {
// Dynamically-resolved functions need a stub for the function.
- if (isCallOp && isa<Function>(GV)) {
- // Function stubs are no longer needed for Mac OS X 10.5 and up.
- if (Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9) {
- O << Name;
- } else {
- FnStubs.insert(Name);
- printSuffixedName(Name, "$stub");
- }
- } else if (GV->hasHiddenVisibility()) {
+ if (GV->hasHiddenVisibility()) {
if (!GV->isDeclaration() && !GV->hasCommonLinkage())
// Definition is not definitely in the current translation unit.
O << Name;
@@ -417,25 +539,12 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << Name;
}
- if (!isCallOp && TM.getRelocationModel() == Reloc::PIC_)
+ if (TM.getRelocationModel() == Reloc::PIC_)
O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
} else {
- if (GV->hasDLLImportLinkage()) {
+ if (GV->hasDLLImportLinkage())
O << "__imp_";
- }
O << Name;
-
- if (isCallOp) {
- if (shouldPrintPLT(TM, Subtarget)) {
- // Assemble call via PLT for externally visible symbols
- if (!GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() &&
- !GV->hasLocalLinkage())
- O << "@PLT";
- }
- if (Subtarget->isTargetCygMing() && GV->isDeclaration())
- // Save function name for later type emission
- FnStubs.insert(Name);
- }
}
if (GV->hasExternalWeakLinkage())
@@ -443,6 +552,10 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
printOffset(MO.getOffset());
+ if (needCloseParen)
+ O << ')';
+
+ bool isRIPRelative = false;
if (isThreadLocal) {
TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
switch (model) {
@@ -456,7 +569,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
case TLSModel::InitialExec:
if (Subtarget->is64Bit()) {
assert (!NotRIPRel);
- O << "@GOTTPOFF(%rip)";
+ O << "@GOTTPOFF";
+ isRIPRelative = true;
} else {
O << "@INDNTPOFF";
}
@@ -476,43 +590,33 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
O << "@GOT";
else
O << "@GOTOFF";
- } else if (Subtarget->isPICStyleRIPRel() && !NotRIPRel) {
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ !NotRIPRel) {
if (TM.getRelocationModel() != Reloc::Static) {
if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
O << "@GOTPCREL";
-
- if (needCloseParen) {
- needCloseParen = false;
- O << ')';
- }
}
-
- // Use rip when possible to reduce code size, except when
- // index or base register are also part of the address. e.g.
- // foo(%rip)(%rcx,%rax,4) is not legal
- O << "(%rip)";
+
+ isRIPRelative = true;
}
}
- if (needCloseParen)
- O << ')';
-
+ // Use rip when possible to reduce code size, except when
+ // index or base register are also part of the address. e.g.
+ // foo(%rip)(%rcx,%rax,4) is not legal.
+ if (isRIPRelative)
+ O << "(%rip)";
+
return;
}
case MachineOperand::MO_ExternalSymbol: {
- bool isCallOp = Modifier && !strcmp(Modifier, "call");
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
bool needCloseParen = false;
std::string Name(TAI->getGlobalPrefix());
Name += MO.getSymbolName();
+
// Print function stub suffix unless it's Mac OS X 10.5 and up.
- if (isCallOp && shouldPrintStub(TM, Subtarget) &&
- !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
- FnStubs.insert(Name);
- printSuffixedName(Name, "$stub");
- return;
- }
- if (!isMemOp && !isCallOp)
+ if (!isMemOp)
O << '$';
else if (Name[0] == '$') {
// The name begins with a dollar-sign. In order to avoid having it look
@@ -536,17 +640,13 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
// addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
O << " + [.-"
<< getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-
- if (isCallOp)
- O << "@PLT";
}
if (needCloseParen)
O << ')';
- if (!isCallOp && Subtarget->isPICStyleRIPRel())
+ if (Subtarget->isPICStyleRIPRel())
O << "(%rip)";
-
return;
}
default:
@@ -673,8 +773,7 @@ void X86ATTAsmPrinter::printPICJumpTableEntry(const MachineJumpTableInfo *MJTI,
printBasicBlockLabel(MBB, false, false, false);
}
-bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO,
- const char Mode) {
+bool X86ATTAsmPrinter::printAsmMRegister(const MachineOperand &MO, char Mode) {
unsigned Reg = MO.getReg();
switch (Mode) {
default: return true; // Unknown mode.
@@ -758,38 +857,85 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+static void lower_lea64_32mem(MCInst *MI, unsigned OpNo) {
+ // Convert registers in the addr mode according to subreg64.
+ for (unsigned i = 0; i != 4; ++i) {
+ if (!MI->getOperand(i).isReg()) continue;
+
+ unsigned Reg = MI->getOperand(i).getReg();
+ if (Reg == 0) continue;
+
+ MI->getOperand(i).setReg(getX86SubSuperRegister(Reg, MVT::i64));
+ }
+}
+
/// printMachineInstruction -- Print out a single X86 LLVM instruction MI in
/// AT&T syntax to the current output stream.
///
void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
++EmittedInsts;
+ if (NewAsmPrinter) {
+ if (MI->getOpcode() == TargetInstrInfo::INLINEASM) {
+ O << "\t";
+ printInlineAsm(MI);
+ return;
+ } else if (MI->isLabel()) {
+ printLabel(MI);
+ return;
+ } else if (MI->getOpcode() == TargetInstrInfo::DECLARE) {
+ printDeclare(MI);
+ return;
+ } else if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF) {
+ printImplicitDef(MI);
+ return;
+ }
+
+ O << "NEW: ";
+ MCInst TmpInst;
+
+ TmpInst.setOpcode(MI->getOpcode());
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+
+ MCOperand MCOp;
+ if (MO.isReg()) {
+ MCOp.MakeReg(MO.getReg());
+ } else if (MO.isImm()) {
+ MCOp.MakeImm(MO.getImm());
+ } else if (MO.isMBB()) {
+ MCOp.MakeMBBLabel(getFunctionNumber(), MO.getMBB()->getNumber());
+ } else {
+ assert(0 && "Unimp");
+ }
+
+ TmpInst.addOperand(MCOp);
+ }
+
+ switch (TmpInst.getOpcode()) {
+ case X86::LEA64_32r:
+ // Handle the 'subreg rewriting' for the lea64_32mem operand.
+ lower_lea64_32mem(&TmpInst, 1);
+ break;
+ }
+
+ // FIXME: Convert TmpInst.
+ printInstruction(&TmpInst);
+ O << "OLD: ";
+ }
+
// Call the autogenerated instruction printer routines.
printInstruction(MI);
}
/// doInitialization
bool X86ATTAsmPrinter::doInitialization(Module &M) {
-
- bool Result = AsmPrinter::doInitialization(M);
-
- if (TAI->doesSupportDebugInformation()) {
- // Let PassManager know we need debug information and relay
- // the MachineModuleInfo address on to DwarfWriter.
- // AsmPrinter::doInitialization did this analysis.
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
- DW = getAnalysisIfAvailable<DwarfWriter>();
- DW->BeginModule(&M, MMI, O, this, TAI);
- }
-
- // Darwin wants symbols to be quoted if they have complex names.
- if (Subtarget->isTargetDarwin())
- Mang->setUseQuotes(true);
-
- return Result;
+ return AsmPrinter::doInitialization(M);
}
-
void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
const TargetData *TD = TM.getTargetData();
@@ -1040,8 +1186,8 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
}
// Emit final debug information.
- DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
- DW->EndModule();
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+ DW->EndModule();
// Funny Darwin hack: This flag tells the linker that no global symbols
// contain code that falls through to other global symbols (e.g. the obvious
@@ -1060,12 +1206,12 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
}
// Emit final debug information.
- DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
- DW->EndModule();
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+ DW->EndModule();
} else if (Subtarget->isTargetELF()) {
// Emit final debug information.
- DwarfWriter *DW = getAnalysisIfAvailable<DwarfWriter>();
- DW->EndModule();
+ if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+ DW->EndModule();
}
return AsmPrinter::doFinalization(M);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index 5b40e73..68a6bc8 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -27,16 +27,16 @@
namespace llvm {
class MachineJumpTableInfo;
+class MCInst;
class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
- DwarfWriter *DW;
MachineModuleInfo *MMI;
const X86Subtarget *Subtarget;
public:
explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
const TargetAsmInfo *T, CodeGenOpt::Level OL,
bool V)
- : AsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {
+ : AsmPrinter(O, TM, T, OL, V), MMI(0) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
}
@@ -63,10 +63,62 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
/// machine instruction was sufficiently described to print it, otherwise it
/// returns false.
bool printInstruction(const MachineInstr *MI);
+
+
+ // New MCInst printing stuff.
+ bool printInstruction(const MCInst *MI);
+
+ void printOperand(const MCInst *MI, unsigned OpNo,
+ const char *Modifier = 0, bool NotRIPRel = false);
+ void printMemReference(const MCInst *MI, unsigned Op);
+ void printLeaMemReference(const MCInst *MI, unsigned Op);
+ void printSSECC(const MCInst *MI, unsigned Op);
+ void printPICLabel(const MCInst *MI, unsigned Op);
+ void print_pcrel_imm(const MCInst *MI, unsigned OpNo);
+
+ void printi8mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi16mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi32mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi64mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printi128mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf32mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf64mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf80mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printf128mem(const MCInst *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
+ void printlea32mem(const MCInst *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64mem(const MCInst *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+ void printlea64_32mem(const MCInst *MI, unsigned OpNo) {
+ printLeaMemReference(MI, OpNo);
+ }
+
+
// These methods are used by the tablegen'erated instruction printer.
void printOperand(const MachineInstr *MI, unsigned OpNo,
const char *Modifier = 0, bool NotRIPRel = false);
+ void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
void printi8mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
@@ -104,7 +156,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
printLeaMemReference(MI, OpNo, "subreg64");
}
- bool printAsmMRegister(const MachineOperand &MO, const char Mode);
+ bool printAsmMRegister(const MachineOperand &MO, char Mode);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode);
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
new file mode 100644
index 0000000..9d50edc
--- /dev/null
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -0,0 +1,143 @@
+//===-- X86ATTInstPrinter.cpp - AT&T assembly instruction printing --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file includes code for rendering MCInst instances as AT&T-style
+// assembly.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "llvm/MC/MCInst.h"
+#include "X86ATTAsmPrinter.h"
+#include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+// Include the auto-generated portion of the assembly writer.
+#define MachineInstr MCInst
+#define NO_ASM_WRITER_BOILERPLATE
+#include "X86GenAsmWriter.inc"
+#undef MachineInstr
+
+void X86ATTAsmPrinter::printSSECC(const MCInst *MI, unsigned Op) {
+ switch (MI->getOperand(Op).getImm()) {
+ default: assert(0 && "Invalid ssecc argument!");
+ case 0: O << "eq"; break;
+ case 1: O << "lt"; break;
+ case 2: O << "le"; break;
+ case 3: O << "unord"; break;
+ case 4: O << "neq"; break;
+ case 5: O << "nlt"; break;
+ case 6: O << "nle"; break;
+ case 7: O << "ord"; break;
+ }
+}
+
+
+void X86ATTAsmPrinter::printPICLabel(const MCInst *MI, unsigned Op) {
+ assert(0 &&
+ "This is only used for MOVPC32r, should lower before asm printing!");
+}
+
+
+/// print_pcrel_imm - This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value. These print slightly differently, for
+/// example, a $ is not emitted.
+void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
+ const MCOperand &Op = MI->getOperand(OpNo);
+
+ if (Op.isImm())
+ O << Op.getImm();
+ else if (Op.isMBBLabel())
+ // FIXME: Keep in sync with printBasicBlockLabel. printBasicBlockLabel
+ // should eventually call into this code, not the other way around.
+ O << TAI->getPrivateGlobalPrefix() << "BB" << Op.getMBBLabelFunction()
+ << '_' << Op.getMBBLabelBlock();
+ else
+ assert(0 && "Unknown pcrel immediate operand");
+}
+
+
+void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+ const char *Modifier, bool NotRIPRel) {
+ assert(Modifier == 0 && "Modifiers should not be used");
+
+ const MCOperand &Op = MI->getOperand(OpNo);
+ if (Op.isReg()) {
+ O << '%';
+ unsigned Reg = Op.getReg();
+#if 0
+ if (Modifier && strncmp(Modifier, "subreg", strlen("subreg")) == 0) {
+ MVT VT = (strcmp(Modifier+6,"64") == 0) ?
+ MVT::i64 : ((strcmp(Modifier+6, "32") == 0) ? MVT::i32 :
+ ((strcmp(Modifier+6,"16") == 0) ? MVT::i16 : MVT::i8));
+ Reg = getX86SubSuperRegister(Reg, VT);
+ }
+#endif
+ O << TRI->getAsmName(Reg);
+ return;
+ } else if (Op.isImm()) {
+ //if (!Modifier || (strcmp(Modifier, "debug") && strcmp(Modifier, "mem")))
+ O << '$';
+ O << Op.getImm();
+ return;
+ }
+
+ O << "<<UNKNOWN OPERAND KIND>>";
+}
+
+void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
+ bool NotRIPRel = false;
+
+ const MCOperand &BaseReg = MI->getOperand(Op);
+ const MCOperand &IndexReg = MI->getOperand(Op+2);
+ const MCOperand &DispSpec = MI->getOperand(Op+3);
+
+ NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
+ if (DispSpec.isImm()) {
+ int64_t DispVal = DispSpec.getImm();
+ if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
+ O << DispVal;
+ } else {
+ abort();
+ //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
+ // DispSpec.isJTI() || DispSpec.isSymbol());
+ //printOperand(MI, Op+3, "mem", NotRIPRel);
+ }
+
+ if (IndexReg.getReg() || BaseReg.getReg()) {
+ // There are cases where we can end up with ESP/RSP in the indexreg slot.
+ // If this happens, swap the base/index register to support assemblers that
+ // don't work when the index is *SP.
+ // FIXME: REMOVE THIS.
+ assert(IndexReg.getReg() != X86::ESP && IndexReg.getReg() != X86::RSP);
+
+ O << '(';
+ if (BaseReg.getReg())
+ printOperand(MI, Op);
+
+ if (IndexReg.getReg()) {
+ O << ',';
+ printOperand(MI, Op+2);
+ unsigned ScaleVal = MI->getOperand(Op+1).getImm();
+ if (ScaleVal != 1)
+ O << ',' << ScaleVal;
+ }
+ O << ')';
+ }
+}
+
+void X86ATTAsmPrinter::printMemReference(const MCInst *MI, unsigned Op) {
+ const MCOperand &Segment = MI->getOperand(Op+4);
+ if (Segment.getReg()) {
+ printOperand(MI, Op+4);
+ O << ':';
+ }
+ printLeaMemReference(MI, Op);
+}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index c874849..a39203b 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -29,13 +29,11 @@ FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
bool verbose) {
const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
- if (Subtarget->isFlavorIntel()) {
+ if (Subtarget->isFlavorIntel())
return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
OptLevel, verbose);
- } else {
- return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
- OptLevel, verbose);
- }
+ return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
+ OptLevel, verbose);
}
namespace {
@@ -48,3 +46,9 @@ namespace {
extern "C" int X86AsmPrinterForceLink;
int X86AsmPrinterForceLink = 0;
+
+// Force static initialization when called from
+// llvm/InitializeAllAsmPrinters.h
+namespace llvm {
+ void InitializeX86AsmPrinter() { }
+}
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index 6599349..ceae7be 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -223,9 +223,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
case MachineOperand::MO_Immediate:
O << MO.getImm();
return;
- case MachineOperand::MO_MachineBasicBlock:
- printBasicBlockLabel(MO.getMBB());
- return;
case MachineOperand::MO_JumpTableIndex: {
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
if (!isMemOp) O << "OFFSET ";
@@ -243,14 +240,13 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
return;
}
case MachineOperand::MO_GlobalAddress: {
- bool isCallOp = Modifier && !strcmp(Modifier, "call");
bool isMemOp = Modifier && !strcmp(Modifier, "mem");
GlobalValue *GV = MO.getGlobal();
std::string Name = Mang->getValueName(GV);
decorateName(Name, GV);
- if (!isMemOp && !isCallOp) O << "OFFSET ";
+ if (!isMemOp) O << "OFFSET ";
if (GV->hasDLLImportLinkage()) {
// FIXME: This should be fixed with full support of stdcall & fastcall
// CC's
@@ -261,8 +257,6 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
return;
}
case MachineOperand::MO_ExternalSymbol: {
- bool isCallOp = Modifier && !strcmp(Modifier, "call");
- if (!isCallOp) O << "OFFSET ";
O << TAI->getGlobalPrefix() << MO.getSymbolName();
return;
}
@@ -271,6 +265,39 @@ void X86IntelAsmPrinter::printOp(const MachineOperand &MO,
}
}
+void X86IntelAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo){
+ const MachineOperand &MO = MI->getOperand(OpNo);
+ switch (MO.getType()) {
+ default: assert(0 && "Unknown pcrel immediate operand");
+ case MachineOperand::MO_Immediate:
+ O << MO.getImm();
+ return;
+ case MachineOperand::MO_MachineBasicBlock:
+ printBasicBlockLabel(MO.getMBB());
+ return;
+
+ case MachineOperand::MO_GlobalAddress: {
+ GlobalValue *GV = MO.getGlobal();
+ std::string Name = Mang->getValueName(GV);
+ decorateName(Name, GV);
+
+ if (GV->hasDLLImportLinkage()) {
+ // FIXME: This should be fixed with full support of stdcall & fastcall
+ // CC's
+ O << "__imp_";
+ }
+ O << Name;
+ printOffset(MO.getOffset());
+ return;
+ }
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << TAI->getGlobalPrefix() << MO.getSymbolName();
+ return;
+ }
+}
+
+
void X86IntelAsmPrinter::printLeaMemReference(const MachineInstr *MI,
unsigned Op,
const char *Modifier) {
@@ -339,8 +366,8 @@ void X86IntelAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
}
void X86IntelAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
- O << "\"L" << getFunctionNumber() << "$pb\"\n";
- O << "\"L" << getFunctionNumber() << "$pb\":";
+ O << "L" << getFunctionNumber() << "$pb\n";
+ O << "L" << getFunctionNumber() << "$pb:";
}
bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
@@ -362,7 +389,7 @@ bool X86IntelAsmPrinter::printAsmMRegister(const MachineOperand &MO,
break;
}
- O << '%' << TRI->getName(Reg);
+ O << TRI->getName(Reg);
return false;
}
@@ -414,7 +441,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
Mang->markCharUnacceptable('.');
- O << "\t.686\n\t.model flat\n\n";
+ O << "\t.686\n\t.MMX\n\t.XMM\n\t.model flat\n\n";
// Emit declarations for external functions.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
@@ -422,7 +449,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
std::string Name = Mang->getValueName(I);
decorateName(Name, I);
- O << "\textern " ;
+ O << "\tEXTERN " ;
if (I->hasDLLImportLinkage()) {
O << "__imp_";
}
@@ -436,7 +463,7 @@ bool X86IntelAsmPrinter::doInitialization(Module &M) {
if (I->isDeclaration()) {
std::string Name = Mang->getValueName(I);
- O << "\textern " ;
+ O << "\tEXTERN " ;
if (I->hasDLLImportLinkage()) {
O << "__imp_";
}
@@ -471,14 +498,14 @@ bool X86IntelAsmPrinter::doFinalization(Module &M) {
case GlobalValue::WeakAnyLinkage:
case GlobalValue::WeakODRLinkage:
SwitchToDataSection("");
- O << name << "?\tsegment common 'COMMON'\n";
+ O << name << "?\tSEGEMNT PARA common 'COMMON'\n";
bCustomSegment = true;
// FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
// are also available.
break;
case GlobalValue::AppendingLinkage:
SwitchToDataSection("");
- O << name << "?\tsegment public 'DATA'\n";
+ O << name << "?\tSEGMENT PARA public 'DATA'\n";
bCustomSegment = true;
// FIXME: the default alignment is 16 bytes, but 1, 2, 4, and 256
// are also available.
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 9520d98..04f2595 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -52,6 +52,9 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
printOp(MO, Modifier);
}
}
+
+ void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
+
void printi8mem(const MachineInstr *MI, unsigned OpNo) {
O << "BYTE PTR ";
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 3796aac..4464878 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -1858,8 +1858,23 @@ Ideal output:
setne %al
ret
-We could do this transformation in instcombine, but it's only clearly
-beneficial on platforms with a test instruction.
+This should definitely be done in instcombine, canonicalizing the range
+condition into a != condition. We get this IR:
+
+define i32 @a(i32 %x) nounwind readnone {
+entry:
+ %0 = and i32 %x, 127 ; <i32> [#uses=1]
+ %1 = icmp ugt i32 %0, 31 ; <i1> [#uses=1]
+ %2 = zext i1 %1 to i32 ; <i32> [#uses=1]
+ ret i32 %2
+}
+
+Instcombine prefers to strength reduce relational comparisons to equality
+comparisons when possible, this should be another case of that. This could
+be handled pretty easily in InstCombiner::visitICmpInstWithInstAndIntCst, but it
+looks like InstCombiner::visitICmpInstWithInstAndIntCst should really already
+be redesigned to use ComputeMaskedBits and friends.
+
//===---------------------------------------------------------------------===//
Testcase:
@@ -1880,20 +1895,40 @@ Ideal output:
Testcase:
int x(int a) { return (a & 0x80) ? 0x100 : 0; }
+int y(int a) { return (a & 0x80) *2; }
-Current output:
+Current:
testl $128, 4(%esp)
setne %al
movzbl %al, %eax
shll $8, %eax
ret
-Ideal output:
+Better:
movl 4(%esp), %eax
addl %eax, %eax
andl $256, %eax
ret
-We generally want to fold shifted tests of a single bit into a shift+and on x86.
+This is another general instcombine transformation that is profitable on all
+targets. In LLVM IR, these functions look like this:
+
+define i32 @x(i32 %a) nounwind readnone {
+entry:
+ %0 = and i32 %a, 128
+ %1 = icmp eq i32 %0, 0
+ %iftmp.0.0 = select i1 %1, i32 0, i32 256
+ ret i32 %iftmp.0.0
+}
+
+define i32 @y(i32 %a) nounwind readnone {
+entry:
+ %0 = shl i32 %a, 1
+ %1 = and i32 %0, 256
+ ret i32 %1
+}
+
+Replacing an icmp+select with a shift should always be considered profitable in
+instcombine.
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 0f2fbcc..ed4eb44 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -991,8 +991,13 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
case X86::FpSET_ST0_32:
case X86::FpSET_ST0_64:
case X86::FpSET_ST0_80:
- assert((StackTop == 1 || StackTop == 2)
- && "Stack should have one or two element on it to return!");
+ // FpSET_ST0_80 is generated by copyRegToReg for both function return
+ // and inline assembly with the "st" constrain. In the latter case,
+ // it is possible for FP0 to be alive after this instruction.
+ if (!MI->killsRegister(X86::FP0)) {
+ // Duplicate ST0
+ duplicateToTop(0, 0, I);
+ }
--StackTop; // "Forget" we have something on the top of stack!
break;
case X86::FpSET_ST1_32:
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index b003efd..9cedafc 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -167,6 +167,8 @@ namespace {
SDValue &Segment);
bool SelectLEAAddr(SDValue Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp);
+ bool SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index, SDValue &Disp);
bool SelectScalarSSELoad(SDValue Op, SDValue Pred,
SDValue N, SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
@@ -1293,6 +1295,32 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op, SDValue N,
return false;
}
+/// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
+bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
+ SDValue &Scale, SDValue &Index,
+ SDValue &Disp) {
+ assert(Op.getOpcode() == X86ISD::TLSADDR);
+ assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
+ const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
+
+ X86ISelAddressMode AM;
+ AM.GV = GA->getGlobal();
+ AM.Disp += GA->getOffset();
+ AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
+
+ if (N.getValueType() == MVT::i32) {
+ AM.Scale = 1;
+ AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
+ } else {
+ AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
+ }
+
+ SDValue Segment;
+ getAddressOperands(AM, Base, Scale, Index, Disp, Segment);
+ return true;
+}
+
+
bool X86DAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 36e3ab2..8d0ea66 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -788,8 +788,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::USUBO, MVT::i64, Custom);
setOperationAction(ISD::SMULO, MVT::i32, Custom);
setOperationAction(ISD::SMULO, MVT::i64, Custom);
- setOperationAction(ISD::UMULO, MVT::i32, Custom);
- setOperationAction(ISD::UMULO, MVT::i64, Custom);
if (!Subtarget->is64Bit()) {
// These libcalls are not available in 32-bit.
@@ -4439,9 +4437,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
// emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
// exec)
- SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
- GA->getValueType(0),
- GA->getOffset());
+ SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
+ GA->getOffset());
SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
if (model == TLSModel::InitialExec)
@@ -8474,6 +8471,14 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
}
}
return;
+ case 'K':
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
+ if ((int8_t)C->getSExtValue() == C->getSExtValue()) {
+ Result = DAG.getTargetConstant(C->getZExtValue(), Op.getValueType());
+ break;
+ }
+ }
+ return;
case 'N':
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
if (C->getZExtValue() <= 255) {
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index dc15e4a..063913f 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -19,6 +19,14 @@
// 64-bits but only 32 bits are significant.
def i64i32imm : Operand<i64>;
+
+// 64-bits but only 32 bits are significant, and those bits are treated as being
+// pc relative.
+def i64i32imm_pcrel : Operand<i64> {
+ let PrintMethod = "print_pcrel_imm";
+}
+
+
// 64-bits but only 8 bits are significant.
def i64i8imm : Operand<i64>;
@@ -29,6 +37,7 @@ def lea64mem : Operand<i64> {
def lea64_32mem : Operand<i32> {
let PrintMethod = "printlea64_32mem";
+ let AsmOperandLowerMethod = "lower_lea64_32mem";
let MIOperandInfo = (ops GR32, i8imm, GR32, i32imm);
}
@@ -39,6 +48,9 @@ def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
[add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
[]>;
+def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
+
//===----------------------------------------------------------------------===//
// Pattern fragments.
//
@@ -113,9 +125,9 @@ let isCall = 1 in
// NOTE: this pattern doesn't match "X86call imm", because we do not know
// that the offset between an arbitrary immediate and the call will fit in
// the 32-bit pcrel field that we have.
- def CALL64pcrel32 : I<0xE8, RawFrm,
- (outs), (ins i64i32imm:$dst, variable_ops),
- "call\t${dst:call}", []>,
+ def CALL64pcrel32 : Ii32<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call\t$dst", []>,
Requires<[In64BitMode]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
"call\t{*}$dst", [(X86call GR64:$dst)]>;
@@ -177,6 +189,15 @@ def PUSH64r : I<0x50, AddRegFrm,
(outs), (ins GR64:$reg), "push{q}\t$reg", []>;
}
+let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH64i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{q}\t$imm", []>;
+def PUSH64i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+ "push{q}\t$imm", []>;
+}
+
let Defs = [RSP, EFLAGS], Uses = [RSP], mayLoad = 1 in
def POPFQ : I<0x9D, RawFrm, (outs), (ins), "popf", []>, REX_W;
let Defs = [RSP], Uses = [RSP, EFLAGS], mayStore = 1 in
@@ -1312,13 +1333,13 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in
-def TLS_addr64 : I<0, Pseudo, (outs), (ins i64imm:$sym),
+def TLS_addr64 : I<0, Pseudo, (outs), (ins lea64mem:$sym),
".byte\t0x66; "
- "leaq\t${sym:mem}(%rip), %rdi; "
+ "leaq\t$sym(%rip), %rdi; "
".word\t0x6666; "
"rex64; "
"call\t__tls_get_addr@PLT",
- [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+ [(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 50ae417..2d8f55f 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -163,6 +163,11 @@ def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
// X86 Operand Definitions.
//
+def i32imm_pcrel : Operand<i32> {
+ let PrintMethod = "print_pcrel_imm";
+}
+
+
// *mem - Operand definitions for the funky X86 addressing mode operands.
//
class X86MemOperand<string printMethod> : Operand<iPTR> {
@@ -206,8 +211,10 @@ def i16i8imm : Operand<i16>;
// 32-bits but only 8 bits are significant.
def i32i8imm : Operand<i32>;
-// Branch targets have OtherVT type.
-def brtarget : Operand<OtherVT>;
+// Branch targets have OtherVT type and print as pc-relative values.
+def brtarget : Operand<OtherVT> {
+ let PrintMethod = "print_pcrel_imm";
+}
//===----------------------------------------------------------------------===//
// X86 Complex Pattern Definitions.
@@ -217,6 +224,8 @@ def brtarget : Operand<OtherVT>;
def addr : ComplexPattern<iPTR, 5, "SelectAddr", [], []>;
def lea32addr : ComplexPattern<i32, 4, "SelectLEAAddr",
[add, sub, mul, shl, or, frameindex], []>;
+def tls32addr : ComplexPattern<i32, 4, "SelectTLSADDRAddr",
+ [tglobaltlsaddr], []>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
@@ -561,8 +570,9 @@ let isCall = 1 in
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in {
- def CALLpcrel32 : Ii32<0xE8, RawFrm, (outs), (ins i32imm:$dst,variable_ops),
- "call\t${dst:call}", []>;
+ def CALLpcrel32 : Ii32<0xE8, RawFrm,
+ (outs), (ins i32imm_pcrel:$dst,variable_ops),
+ "call\t$dst", []>;
def CALL32r : I<0xFF, MRM2r, (outs), (ins GR32:$dst, variable_ops),
"call\t{*}$dst", [(X86call GR32:$dst)]>;
def CALL32m : I<0xFF, MRM2m, (outs), (ins i32mem:$dst, variable_ops),
@@ -587,7 +597,7 @@ def TCRETURNri : I<0, Pseudo, (outs), (ins GR32:$dst, i32imm:$offset, variable_o
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
- def TAILJMPd : IBr<0xE9, (ins i32imm:$dst), "jmp\t${dst:call} # TAILCALL",
+ def TAILJMPd : IBr<0xE9, (ins i32imm_pcrel:$dst), "jmp\t$dst # TAILCALL",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst), "jmp{l}\t{*}$dst # TAILCALL",
@@ -611,6 +621,15 @@ let mayStore = 1 in
def PUSH32r : I<0x50, AddRegFrm, (outs), (ins GR32:$reg), "push{l}\t$reg",[]>;
}
+let Defs = [ESP], Uses = [ESP], neverHasSideEffects = 1, mayStore = 1 in {
+def PUSH32i8 : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+ "push{l}\t$imm", []>;
+def PUSH32i16 : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
+ "push{l}\t$imm", []>;
+def PUSH32i32 : Ii32<0x68, RawFrm, (outs), (ins i32imm:$imm),
+ "push{l}\t$imm", []>;
+}
+
let Defs = [ESP, EFLAGS], Uses = [ESP], mayLoad = 1, neverHasSideEffects=1 in
def POPFD : I<0x9D, RawFrm, (outs), (ins), "popf", []>;
let Defs = [ESP], Uses = [ESP, EFLAGS], mayStore = 1, neverHasSideEffects=1 in
@@ -1726,13 +1745,13 @@ let isTwoAddress = 0 in {
let Defs = [EFLAGS] in {
let Uses = [CL] in {
def SHL8rCL : I<0xD2, MRM4r, (outs GR8 :$dst), (ins GR8 :$src),
- "shl{b}\t{%cl, $dst|$dst, %CL}",
+ "shl{b}\t{%cl, $dst|$dst, CL}",
[(set GR8:$dst, (shl GR8:$src, CL))]>;
def SHL16rCL : I<0xD3, MRM4r, (outs GR16:$dst), (ins GR16:$src),
- "shl{w}\t{%cl, $dst|$dst, %CL}",
+ "shl{w}\t{%cl, $dst|$dst, CL}",
[(set GR16:$dst, (shl GR16:$src, CL))]>, OpSize;
def SHL32rCL : I<0xD3, MRM4r, (outs GR32:$dst), (ins GR32:$src),
- "shl{l}\t{%cl, $dst|$dst, %CL}",
+ "shl{l}\t{%cl, $dst|$dst, CL}",
[(set GR32:$dst, (shl GR32:$src, CL))]>;
} // Uses = [CL]
@@ -1753,13 +1772,13 @@ def SHL32ri : Ii8<0xC1, MRM4r, (outs GR32:$dst), (ins GR32:$src1, i8imm:$src2),
let isTwoAddress = 0 in {
let Uses = [CL] in {
def SHL8mCL : I<0xD2, MRM4m, (outs), (ins i8mem :$dst),
- "shl{b}\t{%cl, $dst|$dst, %CL}",
+ "shl{b}\t{%cl, $dst|$dst, CL}",
[(store (shl (loadi8 addr:$dst), CL), addr:$dst)]>;
def SHL16mCL : I<0xD3, MRM4m, (outs), (ins i16mem:$dst),
- "shl{w}\t{%cl, $dst|$dst, %CL}",
+ "shl{w}\t{%cl, $dst|$dst, CL}",
[(store (shl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
def SHL32mCL : I<0xD3, MRM4m, (outs), (ins i32mem:$dst),
- "shl{l}\t{%cl, $dst|$dst, %CL}",
+ "shl{l}\t{%cl, $dst|$dst, CL}",
[(store (shl (loadi32 addr:$dst), CL), addr:$dst)]>;
}
def SHL8mi : Ii8<0xC0, MRM4m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1788,13 +1807,13 @@ let isTwoAddress = 0 in {
let Uses = [CL] in {
def SHR8rCL : I<0xD2, MRM5r, (outs GR8 :$dst), (ins GR8 :$src),
- "shr{b}\t{%cl, $dst|$dst, %CL}",
+ "shr{b}\t{%cl, $dst|$dst, CL}",
[(set GR8:$dst, (srl GR8:$src, CL))]>;
def SHR16rCL : I<0xD3, MRM5r, (outs GR16:$dst), (ins GR16:$src),
- "shr{w}\t{%cl, $dst|$dst, %CL}",
+ "shr{w}\t{%cl, $dst|$dst, CL}",
[(set GR16:$dst, (srl GR16:$src, CL))]>, OpSize;
def SHR32rCL : I<0xD3, MRM5r, (outs GR32:$dst), (ins GR32:$src),
- "shr{l}\t{%cl, $dst|$dst, %CL}",
+ "shr{l}\t{%cl, $dst|$dst, CL}",
[(set GR32:$dst, (srl GR32:$src, CL))]>;
}
@@ -1822,14 +1841,14 @@ def SHR32r1 : I<0xD1, MRM5r, (outs GR32:$dst), (ins GR32:$src1),
let isTwoAddress = 0 in {
let Uses = [CL] in {
def SHR8mCL : I<0xD2, MRM5m, (outs), (ins i8mem :$dst),
- "shr{b}\t{%cl, $dst|$dst, %CL}",
+ "shr{b}\t{%cl, $dst|$dst, CL}",
[(store (srl (loadi8 addr:$dst), CL), addr:$dst)]>;
def SHR16mCL : I<0xD3, MRM5m, (outs), (ins i16mem:$dst),
- "shr{w}\t{%cl, $dst|$dst, %CL}",
+ "shr{w}\t{%cl, $dst|$dst, CL}",
[(store (srl (loadi16 addr:$dst), CL), addr:$dst)]>,
OpSize;
def SHR32mCL : I<0xD3, MRM5m, (outs), (ins i32mem:$dst),
- "shr{l}\t{%cl, $dst|$dst, %CL}",
+ "shr{l}\t{%cl, $dst|$dst, CL}",
[(store (srl (loadi32 addr:$dst), CL), addr:$dst)]>;
}
def SHR8mi : Ii8<0xC0, MRM5m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1857,13 +1876,13 @@ let isTwoAddress = 0 in {
let Uses = [CL] in {
def SAR8rCL : I<0xD2, MRM7r, (outs GR8 :$dst), (ins GR8 :$src),
- "sar{b}\t{%cl, $dst|$dst, %CL}",
+ "sar{b}\t{%cl, $dst|$dst, CL}",
[(set GR8:$dst, (sra GR8:$src, CL))]>;
def SAR16rCL : I<0xD3, MRM7r, (outs GR16:$dst), (ins GR16:$src),
- "sar{w}\t{%cl, $dst|$dst, %CL}",
+ "sar{w}\t{%cl, $dst|$dst, CL}",
[(set GR16:$dst, (sra GR16:$src, CL))]>, OpSize;
def SAR32rCL : I<0xD3, MRM7r, (outs GR32:$dst), (ins GR32:$src),
- "sar{l}\t{%cl, $dst|$dst, %CL}",
+ "sar{l}\t{%cl, $dst|$dst, CL}",
[(set GR32:$dst, (sra GR32:$src, CL))]>;
}
@@ -1892,13 +1911,13 @@ def SAR32r1 : I<0xD1, MRM7r, (outs GR32:$dst), (ins GR32:$src1),
let isTwoAddress = 0 in {
let Uses = [CL] in {
def SAR8mCL : I<0xD2, MRM7m, (outs), (ins i8mem :$dst),
- "sar{b}\t{%cl, $dst|$dst, %CL}",
+ "sar{b}\t{%cl, $dst|$dst, CL}",
[(store (sra (loadi8 addr:$dst), CL), addr:$dst)]>;
def SAR16mCL : I<0xD3, MRM7m, (outs), (ins i16mem:$dst),
- "sar{w}\t{%cl, $dst|$dst, %CL}",
+ "sar{w}\t{%cl, $dst|$dst, CL}",
[(store (sra (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
def SAR32mCL : I<0xD3, MRM7m, (outs), (ins i32mem:$dst),
- "sar{l}\t{%cl, $dst|$dst, %CL}",
+ "sar{l}\t{%cl, $dst|$dst, CL}",
[(store (sra (loadi32 addr:$dst), CL), addr:$dst)]>;
}
def SAR8mi : Ii8<0xC0, MRM7m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1929,13 +1948,13 @@ let isTwoAddress = 0 in {
// FIXME: provide shorter instructions when imm8 == 1
let Uses = [CL] in {
def ROL8rCL : I<0xD2, MRM0r, (outs GR8 :$dst), (ins GR8 :$src),
- "rol{b}\t{%cl, $dst|$dst, %CL}",
+ "rol{b}\t{%cl, $dst|$dst, CL}",
[(set GR8:$dst, (rotl GR8:$src, CL))]>;
def ROL16rCL : I<0xD3, MRM0r, (outs GR16:$dst), (ins GR16:$src),
- "rol{w}\t{%cl, $dst|$dst, %CL}",
+ "rol{w}\t{%cl, $dst|$dst, CL}",
[(set GR16:$dst, (rotl GR16:$src, CL))]>, OpSize;
def ROL32rCL : I<0xD3, MRM0r, (outs GR32:$dst), (ins GR32:$src),
- "rol{l}\t{%cl, $dst|$dst, %CL}",
+ "rol{l}\t{%cl, $dst|$dst, CL}",
[(set GR32:$dst, (rotl GR32:$src, CL))]>;
}
@@ -1963,13 +1982,13 @@ def ROL32r1 : I<0xD1, MRM0r, (outs GR32:$dst), (ins GR32:$src1),
let isTwoAddress = 0 in {
let Uses = [CL] in {
def ROL8mCL : I<0xD2, MRM0m, (outs), (ins i8mem :$dst),
- "rol{b}\t{%cl, $dst|$dst, %CL}",
+ "rol{b}\t{%cl, $dst|$dst, CL}",
[(store (rotl (loadi8 addr:$dst), CL), addr:$dst)]>;
def ROL16mCL : I<0xD3, MRM0m, (outs), (ins i16mem:$dst),
- "rol{w}\t{%cl, $dst|$dst, %CL}",
+ "rol{w}\t{%cl, $dst|$dst, CL}",
[(store (rotl (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
def ROL32mCL : I<0xD3, MRM0m, (outs), (ins i32mem:$dst),
- "rol{l}\t{%cl, $dst|$dst, %CL}",
+ "rol{l}\t{%cl, $dst|$dst, CL}",
[(store (rotl (loadi32 addr:$dst), CL), addr:$dst)]>;
}
def ROL8mi : Ii8<0xC0, MRM0m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -1998,13 +2017,13 @@ let isTwoAddress = 0 in {
let Uses = [CL] in {
def ROR8rCL : I<0xD2, MRM1r, (outs GR8 :$dst), (ins GR8 :$src),
- "ror{b}\t{%cl, $dst|$dst, %CL}",
+ "ror{b}\t{%cl, $dst|$dst, CL}",
[(set GR8:$dst, (rotr GR8:$src, CL))]>;
def ROR16rCL : I<0xD3, MRM1r, (outs GR16:$dst), (ins GR16:$src),
- "ror{w}\t{%cl, $dst|$dst, %CL}",
+ "ror{w}\t{%cl, $dst|$dst, CL}",
[(set GR16:$dst, (rotr GR16:$src, CL))]>, OpSize;
def ROR32rCL : I<0xD3, MRM1r, (outs GR32:$dst), (ins GR32:$src),
- "ror{l}\t{%cl, $dst|$dst, %CL}",
+ "ror{l}\t{%cl, $dst|$dst, CL}",
[(set GR32:$dst, (rotr GR32:$src, CL))]>;
}
@@ -2032,13 +2051,13 @@ def ROR32r1 : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
let isTwoAddress = 0 in {
let Uses = [CL] in {
def ROR8mCL : I<0xD2, MRM1m, (outs), (ins i8mem :$dst),
- "ror{b}\t{%cl, $dst|$dst, %CL}",
+ "ror{b}\t{%cl, $dst|$dst, CL}",
[(store (rotr (loadi8 addr:$dst), CL), addr:$dst)]>;
def ROR16mCL : I<0xD3, MRM1m, (outs), (ins i16mem:$dst),
- "ror{w}\t{%cl, $dst|$dst, %CL}",
+ "ror{w}\t{%cl, $dst|$dst, CL}",
[(store (rotr (loadi16 addr:$dst), CL), addr:$dst)]>, OpSize;
def ROR32mCL : I<0xD3, MRM1m, (outs), (ins i32mem:$dst),
- "ror{l}\t{%cl, $dst|$dst, %CL}",
+ "ror{l}\t{%cl, $dst|$dst, CL}",
[(store (rotr (loadi32 addr:$dst), CL), addr:$dst)]>;
}
def ROR8mi : Ii8<0xC0, MRM1m, (outs), (ins i8mem :$dst, i8imm:$src),
@@ -2070,17 +2089,17 @@ let isTwoAddress = 0 in {
// Double shift instructions (generalizations of rotate)
let Uses = [CL] in {
def SHLD32rrCL : I<0xA5, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(set GR32:$dst, (X86shld GR32:$src1, GR32:$src2, CL))]>, TB;
def SHRD32rrCL : I<0xAD, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(set GR32:$dst, (X86shrd GR32:$src1, GR32:$src2, CL))]>, TB;
def SHLD16rrCL : I<0xA5, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(set GR16:$dst, (X86shld GR16:$src1, GR16:$src2, CL))]>,
TB, OpSize;
def SHRD16rrCL : I<0xAD, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(set GR16:$dst, (X86shrd GR16:$src1, GR16:$src2, CL))]>,
TB, OpSize;
}
@@ -2115,11 +2134,11 @@ def SHRD16rri8 : Ii8<0xAC, MRMDestReg,
let isTwoAddress = 0 in {
let Uses = [CL] in {
def SHLD32mrCL : I<0xA5, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shld{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shld{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shld (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)]>, TB;
def SHRD32mrCL : I<0xAD, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2),
- "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shrd{l}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shrd (loadi32 addr:$dst), GR32:$src2, CL),
addr:$dst)]>, TB;
}
@@ -2138,11 +2157,11 @@ let isTwoAddress = 0 in {
let Uses = [CL] in {
def SHLD16mrCL : I<0xA5, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shld{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shld{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shld (loadi16 addr:$dst), GR16:$src2, CL),
addr:$dst)]>, TB, OpSize;
def SHRD16mrCL : I<0xAD, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2),
- "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, %CL}",
+ "shrd{w}\t{%cl, $src2, $dst|$dst, $src2, CL}",
[(store (X86shrd (loadi16 addr:$dst), GR16:$src2, CL),
addr:$dst)]>, TB, OpSize;
}
@@ -3095,11 +3114,11 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
- Uses = [ESP, EBX] in
-def TLS_addr32 : I<0, Pseudo, (outs), (ins i32imm:$sym),
- "leal\t${sym:mem}(,%ebx,1), %eax; "
+ Uses = [ESP] in
+def TLS_addr32 : I<0, Pseudo, (outs), (ins lea32mem:$sym),
+ "leal\t$sym, %eax; "
"call\t___tls_get_addr@PLT",
- [(X86tlsaddr tglobaltlsaddr:$sym)]>,
+ [(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
let AddedComplexity = 5 in
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b44c7a6..5d6ef36 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -3027,6 +3027,12 @@ def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
}
+// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
+// fall back to this for SSE1)
+def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
+ (SHUFPSrri VR128:$src2, VR128:$src1,
+ (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+
// Set lowest element and zero upper elements.
let AddedComplexity = 15 in
def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 6c0074e..a2f319f 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -662,6 +662,10 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
TargetFrameInfo::StackGrowsUp ?
TD->getPointerSize() : -TD->getPointerSize());
+ MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+
if (StackSize) {
// Show update of SP.
if (hasFP(MF)) {
@@ -676,7 +680,7 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
}
} else {
- //FIXME: Verify & implement for FP
+ // FIXME: Verify & implement for FP
MachineLocation SPDst(StackPtr);
MachineLocation SPSrc(StackPtr, stackGrowth);
Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
@@ -711,10 +715,6 @@ void X86RegisterInfo::emitFrameMoves(MachineFunction &MF,
MachineLocation FPSrc(FramePtr);
Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
}
-
- MachineLocation FPDst(hasFP(MF) ? FramePtr : StackPtr);
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
}
@@ -729,8 +729,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
bool needsFrameMoves = (MMI && MMI->hasDebugInfo()) ||
!Fn->doesNotThrow() ||
UnwindTablesMandatory;
- DebugLoc DL = (MBBI != MBB.end() ? MBBI->getDebugLoc() :
- DebugLoc::getUnknownLoc());
+ DebugLoc DL;
// Prepare for frame info.
unsigned FrameLabelId = 0;
@@ -822,13 +821,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
}
- unsigned ReadyLabelId = 0;
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer is ready.
- ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
- }
-
// Skip the callee-saved push instructions.
while (MBBI != MBB.end() &&
(MBBI->getOpcode() == X86::PUSH32r ||
@@ -891,8 +883,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII);
}
- if (needsFrameMoves)
+ if (needsFrameMoves) {
+ unsigned ReadyLabelId = 0;
+ // Mark effective beginning of when frame pointer is ready.
+ ReadyLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId);
emitFrameMoves(MF, FrameLabelId, ReadyLabelId);
+ }
}
void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 46476f2..694b0eb 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -173,7 +173,7 @@ public:
bool isPICStyleGOT() const { return PICStyle == PICStyles::GOT; }
bool isPICStyleStub() const { return PICStyle == PICStyles::Stub; }
bool isPICStyleRIPRel() const { return PICStyle == PICStyles::RIPRel; }
- bool isPICStyleWinPIC() const { return PICStyle == PICStyles:: WinPIC; }
+ bool isPICStyleWinPIC() const { return PICStyle == PICStyles::WinPIC; }
/// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard.
unsigned getDarwinVers() const { return DarwinVers; }
diff --git a/lib/Target/X86/X86TargetAsmInfo.cpp b/lib/Target/X86/X86TargetAsmInfo.cpp
index 5dda5f4..f49ca15 100644
--- a/lib/Target/X86/X86TargetAsmInfo.cpp
+++ b/lib/Target/X86/X86TargetAsmInfo.cpp
@@ -44,40 +44,25 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
AlignmentIsInBytes = false;
TextAlignFillValue = 0x90;
- GlobalPrefix = "_";
+
+
if (!is64Bit)
Data64bitsDirective = 0; // we can't emit a 64-bit unit
ZeroDirective = "\t.space\t"; // ".space N" emits N zeros.
- PrivateGlobalPrefix = "L"; // Marker for constant pool idxs
- LessPrivateGlobalPrefix = "l"; // Marker for some ObjC metadata
- BSSSection = 0; // no BSS section.
ZeroFillDirective = "\t.zerofill\t"; // Uses .zerofill
if (TM.getRelocationModel() != Reloc::Static)
ConstantPoolSection = "\t.const_data";
else
ConstantPoolSection = "\t.const\n";
- JumpTableDataSection = "\t.const\n";
- CStringSection = "\t.cstring";
- // FIXME: Why don't always use this section?
- if (is64Bit) {
+ // FIXME: Why don't we always use this section?
+ if (is64Bit)
SixteenByteConstantSection = getUnnamedSection("\t.literal16\n",
SectionFlags::Mergeable);
- }
LCOMMDirective = "\t.lcomm\t";
- SwitchToSectionDirective = "\t.section ";
- StringConstantPrefix = "\1LC";
// Leopard and above support aligned common symbols.
COMMDirectiveTakesAlignment = (Subtarget->getDarwinVers() >= 9);
HasDotTypeDotSizeDirective = false;
- HasSingleParameterDotFile = false;
NonLocalEHFrameLabel = true;
- if (TM.getRelocationModel() == Reloc::Static) {
- StaticCtorsSection = ".constructor";
- StaticDtorsSection = ".destructor";
- } else {
- StaticCtorsSection = ".mod_init_func";
- StaticDtorsSection = ".mod_term_func";
- }
if (is64Bit) {
PersonalityPrefix = "";
PersonalitySuffix = "+4@GOTPCREL";
@@ -85,40 +70,18 @@ X86DarwinTargetAsmInfo::X86DarwinTargetAsmInfo(const X86TargetMachine &TM):
PersonalityPrefix = "L";
PersonalitySuffix = "$non_lazy_ptr";
}
- NeedsIndirectEncoding = true;
InlineAsmStart = "## InlineAsm Start";
InlineAsmEnd = "## InlineAsm End";
CommentString = "##";
SetDirective = "\t.set";
PCSymbol = ".";
UsedDirective = "\t.no_dead_strip\t";
- WeakDefDirective = "\t.weak_definition ";
- WeakRefDirective = "\t.weak_reference ";
- HiddenDirective = "\t.private_extern ";
ProtectedDirective = "\t.globl\t";
- // In non-PIC modes, emit a special label before jump tables so that the
- // linker can perform more accurate dead code stripping.
- if (TM.getRelocationModel() != Reloc::PIC_) {
- // Emit a local label that is preserved until the linker runs.
- JumpTableSpecialLabelPrefix = "l";
- }
-
SupportsDebugInformation = true;
- NeedsSet = true;
- DwarfAbbrevSection = ".section __DWARF,__debug_abbrev,regular,debug";
- DwarfInfoSection = ".section __DWARF,__debug_info,regular,debug";
- DwarfLineSection = ".section __DWARF,__debug_line,regular,debug";
- DwarfFrameSection = ".section __DWARF,__debug_frame,regular,debug";
- DwarfPubNamesSection = ".section __DWARF,__debug_pubnames,regular,debug";
- DwarfPubTypesSection = ".section __DWARF,__debug_pubtypes,regular,debug";
+
DwarfDebugInlineSection = ".section __DWARF,__debug_inlined,regular,debug";
DwarfUsesInlineInfoSection = true;
- DwarfStrSection = ".section __DWARF,__debug_str,regular,debug";
- DwarfLocSection = ".section __DWARF,__debug_loc,regular,debug";
- DwarfARangesSection = ".section __DWARF,__debug_aranges,regular,debug";
- DwarfRangesSection = ".section __DWARF,__debug_ranges,regular,debug";
- DwarfMacInfoSection = ".section __DWARF,__debug_macinfo,regular,debug";
// Exceptions handling
SupportsExceptionHandling = true;
@@ -176,7 +139,7 @@ X86ELFTargetAsmInfo::X86ELFTargetAsmInfo(const X86TargetMachine &TM):
DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
- DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+ DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
// Exceptions handling
SupportsExceptionHandling = true;
@@ -259,7 +222,7 @@ X86COFFTargetAsmInfo::X86COFFTargetAsmInfo(const X86TargetMachine &TM):
DwarfLocSection = "\t.section\t.debug_loc,\"dr\"";
DwarfARangesSection = "\t.section\t.debug_aranges,\"dr\"";
DwarfRangesSection = "\t.section\t.debug_ranges,\"dr\"";
- DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
+ DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"dr\"";
}
unsigned
@@ -340,8 +303,11 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
GlobalPrefix = "_";
CommentString = ";";
+ InlineAsmStart = "; InlineAsm Start";
+ InlineAsmEnd = "; InlineAsm End";
+
PrivateGlobalPrefix = "$";
- AlignDirective = "\talign\t";
+ AlignDirective = "\tALIGN\t";
ZeroDirective = "\tdb\t";
ZeroDirectiveSuffix = " dup(0)";
AsciiDirective = "\tdb\t";
@@ -353,13 +319,15 @@ X86WinTargetAsmInfo::X86WinTargetAsmInfo(const X86TargetMachine &TM):
HasDotTypeDotSizeDirective = false;
HasSingleParameterDotFile = false;
+ AlignmentIsInBytes = true;
+
TextSection = getUnnamedSection("_text", SectionFlags::Code);
DataSection = getUnnamedSection("_data", SectionFlags::Writeable);
JumpTableDataSection = NULL;
SwitchToSectionDirective = "";
- TextSectionStartSuffix = "\tsegment 'CODE'";
- DataSectionStartSuffix = "\tsegment 'DATA'";
+ TextSectionStartSuffix = "\tSEGMENT PARA 'CODE'";
+ DataSectionStartSuffix = "\tSEGMENT PARA 'DATA'";
SectionEndDirectiveSuffix = "\tends\n";
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index dfb055f..53c46c3 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -36,6 +36,11 @@ X("x86", "32-bit X86: Pentium-Pro and above");
static RegisterTarget<X86_64TargetMachine>
Y("x86-64", "64-bit X86: EM64T and AMD64");
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeX86Target() { }
+}
+
// No assembler printer by default
X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index c9a6d8a..ed4c101 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -428,6 +428,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
bool XCoreAsmPrinter::doInitialization(Module &M) {
bool Result = AsmPrinter::doInitialization(M);
+ DW = getAnalysisIfAvailable<DwarfWriter>();
if (!FileDirective.empty()) {
emitFileDirective(FileDirective);
@@ -449,11 +450,6 @@ bool XCoreAsmPrinter::doInitialization(Module &M) {
}
}
- // Emit initial debug information.
- DW = getAnalysisIfAvailable<DwarfWriter>();
- assert(DW && "Dwarf Writer is not available");
- DW->BeginModule(&M, getAnalysisIfAvailable<MachineModuleInfo>(),
- O, this, TAI);
return Result;
}
diff --git a/lib/Target/XCore/XCoreTargetAsmInfo.cpp b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
index 5513762..59ad624 100644
--- a/lib/Target/XCore/XCoreTargetAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreTargetAsmInfo.cpp
@@ -24,6 +24,7 @@ using namespace llvm;
XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
: ELFTargetAsmInfo(TM),
Subtarget(TM.getSubtargetImpl()) {
+ SupportsDebugInformation = true;
TextSection = getUnnamedSection("\t.text", SectionFlags::Code);
DataSection = getNamedSection("\t.dp.data", SectionFlags::Writeable |
SectionFlags::Small);
@@ -64,7 +65,7 @@ XCoreTargetAsmInfo::XCoreTargetAsmInfo(const XCoreTargetMachine &TM)
DwarfLocSection = "\t.section\t.debug_loc,\"\",@progbits";
DwarfARangesSection = "\t.section\t.debug_aranges,\"\",@progbits";
DwarfRangesSection = "\t.section\t.debug_ranges,\"\",@progbits";
- DwarfMacInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
+ DwarfMacroInfoSection = "\t.section\t.debug_macinfo,\"\",@progbits";
}
const Section*
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 5437c57..cfd3cd3 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,6 +31,11 @@ namespace {
RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
}
+// Force static initialization when called from llvm/InitializeAllTargets.h
+namespace llvm {
+ void InitializeXCoreTarget() { }
+}
+
const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
return new XCoreTargetAsmInfo(*this);
}
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 4b85e13..1438b48 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_library(LLVMipo
LoopExtractor.cpp
LowerSetJmp.cpp
MergeFunctions.cpp
+ PartialInlining.cpp
PartialSpecialization.cpp
PruneEH.cpp
RaiseAllocations.cpp
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 9a1b294..cbf3a1d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1667,11 +1667,14 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
//
// NOTE: It doesn't make sense to promote non single-value types since we
// are just replacing static memory to stack memory.
+ //
+ // If the global is in different address space, don't bring it to stack.
if (!GS.HasMultipleAccessingFunctions &&
GS.AccessingFunction && !GS.HasNonInstructionUser &&
GV->getType()->getElementType()->isSingleValueType() &&
GS.AccessingFunction->getName() == "main" &&
- GS.AccessingFunction->hasExternalLinkage()) {
+ GS.AccessingFunction->hasExternalLinkage() &&
+ GV->getType()->getAddressSpace() == 0) {
DOUT << "LOCALIZING GLOBAL: " << *GV;
Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
const Type* ElemTy = GV->getType()->getElementType();
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index b3a25540..0b975ae 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -20,10 +20,13 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/FunctionUtils.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/CFG.h"
using namespace llvm;
+STATISTIC(NumPartialInlined, "Number of functions partially inlined");
+
namespace {
struct VISIBILITY_HIDDEN PartialInliner : public ModulePass {
virtual void getAnalysisUsage(AnalysisUsage &AU) const { }
@@ -132,6 +135,8 @@ Function* PartialInliner::unswitchFunction(Function* F) {
duplicateFunction->replaceAllUsesWith(F);
duplicateFunction->eraseFromParent();
+ ++NumPartialInlined;
+
return extractedFunction;
}
diff --git a/lib/Transforms/IPO/RaiseAllocations.cpp b/lib/Transforms/IPO/RaiseAllocations.cpp
index a81bbdb..8c97b5d 100644
--- a/lib/Transforms/IPO/RaiseAllocations.cpp
+++ b/lib/Transforms/IPO/RaiseAllocations.cpp
@@ -82,14 +82,14 @@ void RaiseAllocations::doInitialization(Module &M) {
// Chck to see if we got the expected malloc
if (TyWeHave != Malloc1Type) {
- // Check to see if the prototype is wrong, giving us sbyte*(uint) * malloc
+ // Check to see if the prototype is wrong, giving us i8*(i32) * malloc
// This handles the common declaration of: 'void *malloc(unsigned);'
const FunctionType *Malloc2Type =
FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
std::vector<const Type*>(1, Type::Int32Ty), false);
if (TyWeHave != Malloc2Type) {
// Check to see if the prototype is missing, giving us
- // sbyte*(...) * malloc
+ // i8*(...) * malloc
// This handles the common declaration of: 'void *malloc();'
const FunctionType *Malloc3Type =
FunctionType::get(PointerType::getUnqual(Type::Int8Ty),
diff --git a/lib/Transforms/Instrumentation/RSProfiling.cpp b/lib/Transforms/Instrumentation/RSProfiling.cpp
index c6cf4df..b110f4e 100644
--- a/lib/Transforms/Instrumentation/RSProfiling.cpp
+++ b/lib/Transforms/Instrumentation/RSProfiling.cpp
@@ -108,9 +108,9 @@ namespace {
class VISIBILITY_HIDDEN GlobalRandomCounter : public Chooser {
GlobalVariable* Counter;
Value* ResetValue;
- const Type* T;
+ const IntegerType* T;
public:
- GlobalRandomCounter(Module& M, const Type* t, uint64_t resetval);
+ GlobalRandomCounter(Module& M, const IntegerType* t, uint64_t resetval);
virtual ~GlobalRandomCounter();
virtual void PrepFunction(Function* F);
virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -121,9 +121,9 @@ namespace {
GlobalVariable* Counter;
Value* ResetValue;
AllocaInst* AI;
- const Type* T;
+ const IntegerType* T;
public:
- GlobalRandomCounterOpt(Module& M, const Type* t, uint64_t resetval);
+ GlobalRandomCounterOpt(Module& M, const IntegerType* t, uint64_t resetval);
virtual ~GlobalRandomCounterOpt();
virtual void PrepFunction(Function* F);
virtual void ProcessChoicePoint(BasicBlock* bb);
@@ -193,7 +193,7 @@ static void getBackEdges(Function& F, T& BackEdges);
// Methods of choosing when to profile
///////////////////////////////////////
-GlobalRandomCounter::GlobalRandomCounter(Module& M, const Type* t,
+GlobalRandomCounter::GlobalRandomCounter(Module& M, const IntegerType* t,
uint64_t resetval) : T(t) {
ConstantInt* Init = ConstantInt::get(T, resetval);
ResetValue = Init;
@@ -229,7 +229,7 @@ void GlobalRandomCounter::ProcessChoicePoint(BasicBlock* bb) {
ReplacePhiPred(oldnext, bb, resetblock);
}
-GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const Type* t,
+GlobalRandomCounterOpt::GlobalRandomCounterOpt(Module& M, const IntegerType* t,
uint64_t resetval)
: AI(0), T(t) {
ConstantInt* Init = ConstantInt::get(T, resetval);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 42978e7..e9bee64 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -401,8 +401,8 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
-/// copy (e.g. it's casting from one pointer type to another, int->uint, or
-/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
+/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
+/// sink it into user blocks to reduce the number of virtual
/// registers that must be created and coalesced.
///
/// Return true if any changes are made.
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 673d38b..f4a9898 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -37,6 +37,7 @@
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <cstdio>
using namespace llvm;
@@ -48,7 +49,7 @@ STATISTIC(NumPRELoad, "Number of loads PRE'd");
static cl::opt<bool> EnablePRE("enable-pre",
cl::init(true), cl::Hidden);
-cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
+static cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true));
//===----------------------------------------------------------------------===//
// ValueTable Class
@@ -952,8 +953,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].second.isClobber())
+ if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+ DEBUG(
+ DOUT << "GVN: non-local load ";
+ WriteAsOperand(*DOUT.stream(), LI);
+ DOUT << " is clobbered by " << *Deps[0].second.getInst();
+ );
return false;
+ }
// Filter out useless results (non-locals, etc). Keep track of the blocks
// where we have a value available in repl, also keep track of whether we see
@@ -1069,6 +1076,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
BasicBlock *TmpBB = LoadBB;
bool isSinglePred = false;
+ bool allSingleSucc = true;
while (TmpBB->getSinglePredecessor()) {
isSinglePred = true;
TmpBB = TmpBB->getSinglePredecessor();
@@ -1078,6 +1086,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
return false;
if (Blockers.count(TmpBB))
return false;
+ if (TmpBB->getTerminator()->getNumSuccessors() != 1)
+ allSingleSucc = false;
}
assert(TmpBB);
@@ -1154,7 +1164,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
<< UnavailablePred->getName() << "': " << *LI);
return false;
}
-
+
+ // Make sure it is valid to move this load here. We have to watch out for:
+ // @1 = getelementptr (i8* p, ...
+ // test p and branch if == 0
+ // load @1
+ // It is valid to have the getelementptr before the test, even if p can be 0,
+ // as getelementptr only does address arithmetic.
+ // If we are not pushing the value through any multiple-successor blocks
+ // we do not have this case. Otherwise, check that the load is safe to
+ // put anywhere; this can be improved, but should be conservatively safe.
+ if (!allSingleSucc &&
+ !isSafeToLoadUnconditionally(LoadPtr, UnavailablePred->getTerminator()))
+ return false;
+
// Okay, we can eliminate this load by inserting a reload in the predecessor
// and using PHI construction to get the value in the other predecessors, do
// it.
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 38b1198..326fb38 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -17,7 +17,10 @@
// which starts at zero and steps by one.
// 2. The canonical induction variable is guaranteed to be the first PHI node
// in the loop header block.
-// 3. Any pointer arithmetic recurrences are raised to use array subscripts.
+// 3. The canonical induction variable is guaranteed to be in a wide enough
+// type so that IV expressions need not be (directly) zero-extended or
+// sign-extended.
+// 4. Any pointer arithmetic recurrences are raised to use array subscripts.
//
// If the trip count of a loop is computable, this pass also makes the following
// changes:
@@ -296,11 +299,11 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
// If this instruction is dead now, delete it.
RecursivelyDeleteTriviallyDeadInstructions(Inst);
- // See if this is a single-entry LCSSA PHI node. If so, we can (and
- // have to) remove
- // the PHI entirely. This is safe, because the NewVal won't be variant
+ // If we're inserting code into the exit block rather than the
+ // preheader, we can (and have to) remove the PHI entirely.
+ // This is safe, because the NewVal won't be variant
// in the loop, so we don't need an LCSSA phi node anymore.
- if (NumPreds == 1) {
+ if (ExitBlocks.size() == 1) {
PN->replaceAllUsesWith(ExitVal);
RecursivelyDeleteTriviallyDeadInstructions(PN);
break;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 5465e4a..5bd17e0 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -390,7 +390,7 @@ namespace {
Value *EvaluateInDifferentType(Value *V, const Type *Ty, bool isSigned);
- bool CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+ bool CanEvaluateInDifferentType(Value *V, const Type *Ty,
unsigned CastOpc, int &NumCastsRemoved);
unsigned GetOrEnforceKnownAlignment(Value *V,
unsigned PrefAlign = 0);
@@ -654,30 +654,12 @@ static unsigned getOpcode(const Value *V) {
}
/// AddOne - Add one to a ConstantInt
-static ConstantInt *AddOne(ConstantInt *C) {
- APInt Val(C->getValue());
- return ConstantInt::get(++Val);
+static Constant *AddOne(Constant *C) {
+ return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1));
}
/// SubOne - Subtract one from a ConstantInt
-static ConstantInt *SubOne(ConstantInt *C) {
- APInt Val(C->getValue());
- return ConstantInt::get(--Val);
-}
-/// Add - Add two ConstantInts together
-static ConstantInt *Add(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() + C2->getValue());
-}
-/// And - Bitwise AND two ConstantInts together
-static ConstantInt *And(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() & C2->getValue());
-}
-/// Subtract - Subtract one ConstantInt from another
-static ConstantInt *Subtract(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() - C2->getValue());
-}
-/// Multiply - Multiply two ConstantInts together
-static ConstantInt *Multiply(ConstantInt *C1, ConstantInt *C2) {
- return ConstantInt::get(C1->getValue() * C2->getValue());
+static Constant *SubOne(ConstantInt *C) {
+ return ConstantExpr::getSub(C, ConstantInt::get(C->getType(), 1));
}
/// MultiplyOverflows - True if the multiply can not be expressed in an int
/// this size.
@@ -774,7 +756,7 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
/// SimplifyDemandedBits knows about. See if the instruction has any
/// properties that allow us to simplify its operands.
bool InstCombiner::SimplifyDemandedInstructionBits(Instruction &Inst) {
- unsigned BitWidth = cast<IntegerType>(Inst.getType())->getBitWidth();
+ unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
APInt DemandedMask(APInt::getAllOnesValue(BitWidth));
@@ -830,13 +812,13 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
const Type *VTy = V->getType();
assert((TD || !isa<PointerType>(VTy)) &&
"SimplifyDemandedBits needs to know bit widths!");
- assert((!TD || TD->getTypeSizeInBits(VTy) == BitWidth) &&
- (!isa<IntegerType>(VTy) ||
- VTy->getPrimitiveSizeInBits() == BitWidth) &&
+ assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
+ (!VTy->isIntOrIntVector() ||
+ VTy->getScalarSizeInBits() == BitWidth) &&
KnownZero.getBitWidth() == BitWidth &&
KnownOne.getBitWidth() == BitWidth &&
- "Value *V, DemandedMask, KnownZero and KnownOne \
- must have same BitWidth");
+ "Value *V, DemandedMask, KnownZero and KnownOne "
+ "must have same BitWidth");
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
// We know all of the bits for a constant!
KnownOne = CI->getValue() & DemandedMask;
@@ -1089,7 +1071,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
RHSKnownZero &= LHSKnownZero;
break;
case Instruction::Trunc: {
- unsigned truncBf = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned truncBf = I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask.zext(truncBf);
RHSKnownZero.zext(truncBf);
RHSKnownOne.zext(truncBf);
@@ -1112,7 +1094,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
break;
case Instruction::ZExt: {
// Compute the bits in the result that are not present in the input.
- unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
DemandedMask.trunc(SrcBitWidth);
RHSKnownZero.trunc(SrcBitWidth);
@@ -1130,7 +1112,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
case Instruction::SExt: {
// Compute the bits in the result that are not present in the input.
- unsigned SrcBitWidth =I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned SrcBitWidth =I->getOperand(0)->getType()->getScalarSizeInBits();
APInt InputDemandedBits = DemandedMask &
APInt::getLowBitsSet(BitWidth, SrcBitWidth);
@@ -1354,7 +1336,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
if (ConstantInt *Rem = dyn_cast<ConstantInt>(I->getOperand(1))) {
APInt RA = Rem->getValue().abs();
if (RA.isPowerOf2()) {
- if (DemandedMask.ule(RA)) // srem won't affect demanded bits
+ if (DemandedMask.ult(RA)) // srem won't affect demanded bits
return I->getOperand(0);
APInt LowBits = RA - 1;
@@ -2087,7 +2069,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// See if SimplifyDemandedBits can simplify this. This handles stuff like
// (X & 254)+1 -> (X&254)|1
- if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+ if (SimplifyDemandedInstructionBits(I))
return &I;
// zext(i1) - 1 -> select i1, 0, -1
@@ -2107,7 +2089,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Value *XorLHS = 0;
if (isa<ConstantInt>(RHSC) &&
match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) {
- uint32_t TySizeBits = I.getType()->getPrimitiveSizeInBits();
+ uint32_t TySizeBits = I.getType()->getScalarSizeInBits();
const APInt& RHSVal = cast<ConstantInt>(RHSC)->getValue();
uint32_t Size = TySizeBits / 2;
@@ -2197,7 +2179,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// X*C1 + X*C2 --> X * (C1+C2)
ConstantInt *C1;
if (X == dyn_castFoldableMul(RHS, C1))
- return BinaryOperator::CreateMul(X, Add(C1, C2));
+ return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2));
}
// X + X*C --> X * (C+1)
@@ -2262,7 +2244,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
// (X & FF00) + xx00 -> (X+xx00) & FF00
if (LHS->hasOneUse() && match(LHS, m_And(m_Value(X), m_ConstantInt(C2)))) {
- Constant *Anded = And(CRHS, C2);
+ Constant *Anded = ConstantExpr::getAnd(CRHS, C2);
if (Anded == CRHS) {
// See if all bits from the first bit set in the Add RHS up are included
// in the mask. First, get the rightmost bit.
@@ -2290,7 +2272,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
}
// add (cast *A to intptrtype) B ->
- // cast (GEP (cast *A to sbyte*) B) --> intptrtype
+ // cast (GEP (cast *A to i8*) B) --> intptrtype
{
CastInst *CI = dyn_cast<CastInst>(LHS);
Value *Other = RHS;
@@ -2299,7 +2281,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
Other = LHS;
}
if (CI && CI->getType()->isSized() &&
- (CI->getType()->getPrimitiveSizeInBits() ==
+ (CI->getType()->getScalarSizeInBits() ==
TD->getIntPtrType()->getPrimitiveSizeInBits())
&& isa<PointerType>(CI->getOperand(0)->getType())) {
unsigned AS =
@@ -2523,7 +2505,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
else if (ConstantInt *CI1 = dyn_cast<ConstantInt>(I.getOperand(0))) {
if (ConstantInt *CI2 = dyn_cast<ConstantInt>(Op1I->getOperand(1)))
// C1-(X+C2) --> (C1-C2)-X
- return BinaryOperator::CreateSub(Subtract(CI1, CI2),
+ return BinaryOperator::CreateSub(ConstantExpr::getSub(CI1, CI2),
Op1I->getOperand(0));
}
}
@@ -2564,7 +2546,8 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
// X - X*C --> X * (1-C)
ConstantInt *C2 = 0;
if (dyn_castFoldableMul(Op1I, C2) == Op0) {
- Constant *CP1 = Subtract(ConstantInt::get(I.getType(), 1), C2);
+ Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(), 1),
+ C2);
return BinaryOperator::CreateMul(Op0, CP1);
}
}
@@ -2589,7 +2572,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2)
if (X == dyn_castFoldableMul(Op1, C2))
- return BinaryOperator::CreateMul(X, Subtract(C1, C2));
+ return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2));
}
return 0;
}
@@ -2950,12 +2933,12 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
// (sdiv X, X) --> 1 (udiv X, X) --> 1
if (Op0 == Op1) {
if (const VectorType *Ty = dyn_cast<VectorType>(I.getType())) {
- ConstantInt *CI = ConstantInt::get(Ty->getElementType(), 1);
+ Constant *CI = ConstantInt::get(Ty->getElementType(), 1);
std::vector<Constant*> Elts(Ty->getNumElements(), CI);
return ReplaceInstUsesWith(I, ConstantVector::get(Elts));
}
- ConstantInt *CI = ConstantInt::get(I.getType(), 1);
+ Constant *CI = ConstantInt::get(I.getType(), 1);
return ReplaceInstUsesWith(I, CI);
}
@@ -2980,7 +2963,7 @@ Instruction *InstCombiner::commonIDivTransforms(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
else
return BinaryOperator::Create(I.getOpcode(), LHS->getOperand(0),
- Multiply(RHS, LHSRHS));
+ ConstantExpr::getMul(RHS, LHSRHS));
}
if (!RHS->isZero()) { // avoid X udiv 0
@@ -3513,7 +3496,7 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
Value *X = Op->getOperand(0);
Constant *Together = 0;
if (!Op->isShift())
- Together = And(AndRHS, OpRHS);
+ Together = ConstantExpr::getAnd(AndRHS, OpRHS);
switch (Op->getOpcode()) {
case Instruction::Xor:
@@ -3724,7 +3707,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
switch (LHSI->getOpcode()) {
default: return 0;
case Instruction::And:
- if (And(N, Mask) == Mask) {
+ if (ConstantExpr::getAnd(N, Mask) == Mask) {
// If the AndRHS is a power of two minus one (0+1+), this is simple.
if ((Mask->getValue().countLeadingZeros() +
Mask->getValue().countPopulation()) ==
@@ -3748,7 +3731,7 @@ Value *InstCombiner::FoldLogicalPlusAnd(Value *LHS, Value *RHS,
// If the AndRHS is a power of two minus one (0+1+), and N&Mask == 0
if ((Mask->getValue().countLeadingZeros() +
Mask->getValue().countPopulation()) == Mask->getValue().getBitWidth()
- && And(N, Mask)->isZero())
+ && ConstantExpr::getAnd(N, Mask)->isNullValue())
break;
return 0;
}
@@ -3946,10 +3929,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- if (!isa<VectorType>(I.getType())) {
- if (SimplifyDemandedInstructionBits(I))
- return &I;
- } else {
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (isa<VectorType>(I.getType())) {
if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
if (CP->isAllOnesValue()) // X & <-1,-1> -> X
return ReplaceInstUsesWith(I, I.getOperand(0));
@@ -3957,7 +3939,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>
}
}
-
+
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt& AndRHSMask = AndRHS->getValue();
APInt NotAndRHS(~AndRHSMask);
@@ -4510,7 +4492,7 @@ Instruction *InstCombiner::FoldOrOfICmps(Instruction &I,
Instruction *Add = BinaryOperator::CreateAdd(Val, AddCST,
Val->getName()+".off");
InsertNewInstBefore(Add, I);
- AddCST = Subtract(AddOne(RHSCst), LHSCst);
+ AddCST = ConstantExpr::getSub(AddOne(RHSCst), LHSCst);
return new ICmpInst(ICmpInst::ICMP_ULT, Add, AddCST);
}
break; // (X == 13 | X == 15) -> no change
@@ -4653,18 +4635,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- if (!isa<VectorType>(I.getType())) {
- if (SimplifyDemandedInstructionBits(I))
- return &I;
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
- return ReplaceInstUsesWith(I, I.getOperand(1));
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (isa<VectorType>(I.getType())) {
+ if (isa<ConstantAggregateZero>(Op1)) {
+ return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
+ } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
+ if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
+ return ReplaceInstUsesWith(I, I.getOperand(1));
+ }
}
-
-
// or X, -1 == -1
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
@@ -4991,12 +4972,11 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- if (!isa<VectorType>(I.getType())) {
- if (SimplifyDemandedInstructionBits(I))
- return &I;
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
- }
+ if (SimplifyDemandedInstructionBits(I))
+ return &I;
+ if (isa<VectorType>(I.getType()))
+ if (isa<ConstantAggregateZero>(Op1))
+ return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
// Is this a ~ operation?
if (Value *NotOp = dyn_castNotVal(&I)) {
@@ -5083,7 +5063,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
Constant *NewRHS = ConstantExpr::getOr(Op0CI, RHS);
// Anything in both C1 and C2 is known to be zero, remove it from
// NewRHS.
- Constant *CommonBits = And(Op0CI, RHS);
+ Constant *CommonBits = ConstantExpr::getAnd(Op0CI, RHS);
NewRHS = ConstantExpr::getAnd(NewRHS,
ConstantExpr::getNot(CommonBits));
AddToWorkList(Op0I);
@@ -5247,12 +5227,13 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
return Changed ? &I : 0;
}
-/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
-/// overflowed for this type.
-static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
- ConstantInt *In2, bool IsSigned = false) {
- Result = cast<ConstantInt>(Add(In1, In2));
+static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+ return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
+}
+static bool HasAddOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
if (IsSigned)
if (In2->getValue().isNegative())
return Result->getValue().sgt(In1->getValue());
@@ -5262,12 +5243,32 @@ static bool AddWithOverflow(ConstantInt *&Result, ConstantInt *In1,
return Result->getValue().ult(In1->getValue());
}
-/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// AddWithOverflow - Compute Result = In1+In2, returning true if the result
/// overflowed for this type.
-static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
- ConstantInt *In2, bool IsSigned = false) {
- Result = cast<ConstantInt>(Subtract(In1, In2));
+static bool AddWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getAdd(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+ if (HasAddOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasAddOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+static bool HasSubOverflow(ConstantInt *Result,
+ ConstantInt *In1, ConstantInt *In2,
+ bool IsSigned) {
if (IsSigned)
if (In2->getValue().isNegative())
return Result->getValue().slt(In1->getValue());
@@ -5277,6 +5278,29 @@ static bool SubWithOverflow(ConstantInt *&Result, ConstantInt *In1,
return Result->getValue().ugt(In1->getValue());
}
+/// SubWithOverflow - Compute Result = In1-In2, returning true if the result
+/// overflowed for this type.
+static bool SubWithOverflow(Constant *&Result, Constant *In1,
+ Constant *In2, bool IsSigned = false) {
+ Result = ConstantExpr::getSub(In1, In2);
+
+ if (const VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
+ for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
+ Constant *Idx = ConstantInt::get(Type::Int32Ty, i);
+ if (HasSubOverflow(ExtractElement(Result, Idx),
+ ExtractElement(In1, Idx),
+ ExtractElement(In2, Idx),
+ IsSigned))
+ return true;
+ }
+ return false;
+ }
+
+ return HasSubOverflow(cast<ConstantInt>(Result),
+ cast<ConstantInt>(In1), cast<ConstantInt>(In2),
+ IsSigned);
+}
+
/// EmitGEPOffset - Given a getelementptr instruction/constantexpr, emit the
/// code necessary to compute the offset from the base pointer (without adding
/// in the base pointer). Return the result as a signed integer of intptr size.
@@ -5589,7 +5613,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// Check to see that the input is converted from an integer type that is small
// enough that preserves all bits. TODO: check here for "known" sign bits.
// This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
- unsigned InputSize = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
+ unsigned InputSize = LHSI->getOperand(0)->getType()->getScalarSizeInBits();
// If this is a uitofp instruction, we need an extra bit to hold the sign.
bool LHSUnsigned = isa<UIToFPInst>(LHSI);
@@ -5644,7 +5668,7 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
// See if the FP constant is too large for the integer. For example,
// comparing an i8 to 300.0.
- unsigned IntWidth = IntTy->getPrimitiveSizeInBits();
+ unsigned IntWidth = IntTy->getScalarSizeInBits();
if (!LHSUnsigned) {
// If the RHS value is > SignedMax, fold the comparison. This handles +INF
@@ -5943,9 +5967,9 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
unsigned BitWidth = 0;
if (TD)
- BitWidth = TD->getTypeSizeInBits(Ty);
- else if (isa<IntegerType>(Ty))
- BitWidth = Ty->getPrimitiveSizeInBits();
+ BitWidth = TD->getTypeSizeInBits(Ty->getScalarType());
+ else if (Ty->isIntOrIntVector())
+ BitWidth = Ty->getScalarSizeInBits();
bool isSignBit = false;
@@ -6459,7 +6483,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
// C2 (CI). By solving for X we can turn this into a range check
// instead of computing a divide.
- ConstantInt *Prod = Multiply(CmpRHS, DivRHS);
+ Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
// Determine if the product overflows by seeing if the product is
// not equal to the divide. Make sure we do the same kind of divide
@@ -6478,7 +6502,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
// overflow variable is set to 0 if it's corresponding bound variable is valid
// -1 if overflowed off the bottom end, or +1 if overflowed off the top end.
int LoOverflow = 0, HiOverflow = 0;
- ConstantInt *LoBound = 0, *HiBound = 0;
+ Constant *LoBound = 0, *HiBound = 0;
if (!DivIsSigned) { // udiv
// e.g. X/5 op 3 --> [15, 20)
@@ -6966,7 +6990,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
if (BO->hasOneUse())
return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
- Subtract(RHS, BOp1C));
+ ConstantExpr::getSub(RHS, BOp1C));
} else if (RHSV == 0) {
// Replace ((add A, B) != 0) with (A != -B) if A or B is
// efficiently invertible, or if the add has just this one use.
@@ -7133,10 +7157,10 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICI) {
if (Res2 == CI) {
// Make sure that sign of the Cmp and the sign of the Cast are the same.
// For example, we might have:
- // %A = sext short %X to uint
- // %B = icmp ugt uint %A, 1330
+ // %A = sext i16 %X to i32
+ // %B = icmp ugt i32 %A, 1330
// It is incorrect to transform this into
- // %B = icmp ugt short %X, 1330
+ // %B = icmp ugt i16 %X, 1330
// because %A may have negative value.
//
// However, we allow this when the compare is EQ/NE, because they are
@@ -7210,18 +7234,16 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
if (ConstantInt *CSI = dyn_cast<ConstantInt>(Op0))
if (CSI->isAllOnesValue())
return ReplaceInstUsesWith(I, CSI);
-
+
// See if we can turn a signed shr into an unsigned shr.
- if (!isa<VectorType>(I.getType())) {
- if (MaskedValueIsZero(Op0,
- APInt::getSignBit(I.getType()->getPrimitiveSizeInBits())))
- return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
-
- // Arithmetic shifting an all-sign-bit value is a no-op.
- unsigned NumSignBits = ComputeNumSignBits(Op0);
- if (NumSignBits == Op0->getType()->getPrimitiveSizeInBits())
- return ReplaceInstUsesWith(I, Op0);
- }
+ if (MaskedValueIsZero(Op0,
+ APInt::getSignBit(I.getType()->getScalarSizeInBits())))
+ return BinaryOperator::CreateLShr(Op0, I.getOperand(1));
+
+ // Arithmetic shifting an all-sign-bit value is a no-op.
+ unsigned NumSignBits = ComputeNumSignBits(Op0);
+ if (NumSignBits == Op0->getType()->getScalarSizeInBits())
+ return ReplaceInstUsesWith(I, Op0);
return 0;
}
@@ -7250,7 +7272,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
}
// See if we can fold away this shift.
- if (!isa<VectorType>(I.getType()) && SimplifyDemandedInstructionBits(I))
+ if (SimplifyDemandedInstructionBits(I))
return &I;
// Try to fold constant and into select arguments.
@@ -7271,10 +7293,10 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
- uint32_t TypeBits = Op0->getType()->getPrimitiveSizeInBits();
+ uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
- // shl uint X, 32 = 0 and shr ubyte Y, 9 = 0, ... just don't eliminate shr
- // of a signed value.
+ // shl i32 X, 32 = 0 and srl i8 Y, 9 = 0, ... just don't eliminate
+ // a signed shift.
//
if (Op1->uge(TypeBits)) {
if (I.getOpcode() != Instruction::AShr)
@@ -7320,8 +7342,8 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, ConstantInt *Op1,
// part of the register be zeros. Emulate this by inserting an AND to
// clear the top bits as needed. This 'and' will usually be zapped by
// other xforms later if dead.
- unsigned SrcSize = TrOp->getType()->getPrimitiveSizeInBits();
- unsigned DstSize = TI->getType()->getPrimitiveSizeInBits();
+ unsigned SrcSize = TrOp->getType()->getScalarSizeInBits();
+ unsigned DstSize = TI->getType()->getScalarSizeInBits();
APInt MaskV(APInt::getLowBitsSet(SrcSize, DstSize));
// The mask we constructed says what the trunc would do if occurring
@@ -7729,7 +7751,8 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
// If the allocation size is constant, form a constant mul expression
Amt = ConstantInt::get(Type::Int32Ty, Scale);
if (isa<ConstantInt>(NumElements))
- Amt = Multiply(cast<ConstantInt>(NumElements), cast<ConstantInt>(Amt));
+ Amt = ConstantExpr::getMul(cast<ConstantInt>(NumElements),
+ cast<ConstantInt>(Amt));
// otherwise multiply the amount and the number of elements
else {
Instruction *Tmp = BinaryOperator::CreateMul(Amt, NumElements, "tmp");
@@ -7788,17 +7811,17 @@ Instruction *InstCombiner::PromoteCastOfAllocation(BitCastInst &CI,
/// If CastOpc is a sext or zext, we are asking if the low bits of the value can
/// bit computed in a larger type, which is then and'd or sext_in_reg'd to get
/// the final result.
-bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
+bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
unsigned CastOpc,
int &NumCastsRemoved){
// We can always evaluate constants in another type.
- if (isa<ConstantInt>(V))
+ if (isa<Constant>(V))
return true;
Instruction *I = dyn_cast<Instruction>(V);
if (!I) return false;
- const IntegerType *OrigTy = cast<IntegerType>(V->getType());
+ const Type *OrigTy = V->getType();
// If this is an extension or truncate, we can often eliminate it.
if (isa<TruncInst>(I) || isa<ZExtInst>(I) || isa<SExtInst>(I)) {
@@ -7836,8 +7859,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
// If we are truncating the result of this SHL, and if it's a shift of a
// constant amount, we can always perform a SHL in a smaller type.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t BitWidth = Ty->getBitWidth();
- if (BitWidth < OrigTy->getBitWidth() &&
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
+ if (BitWidth < OrigTy->getScalarSizeInBits() &&
CI->getLimitedValue(BitWidth) < BitWidth)
return CanEvaluateInDifferentType(I->getOperand(0), Ty, CastOpc,
NumCastsRemoved);
@@ -7848,8 +7871,8 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const IntegerType *Ty,
// lshr iff we know that the bits we would otherwise be shifting in are
// already zeros.
if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- uint32_t OrigBitWidth = OrigTy->getBitWidth();
- uint32_t BitWidth = Ty->getBitWidth();
+ uint32_t OrigBitWidth = OrigTy->getScalarSizeInBits();
+ uint32_t BitWidth = Ty->getScalarSizeInBits();
if (BitWidth < OrigBitWidth &&
MaskedValueIsZero(I->getOperand(0),
APInt::getHighBitsSet(OrigBitWidth, OrigBitWidth-BitWidth)) &&
@@ -8131,8 +8154,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
Value *Src = CI.getOperand(0);
const Type *SrcTy = Src->getType();
const Type *DestTy = CI.getType();
- uint32_t SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- uint32_t DestBitSize = DestTy->getPrimitiveSizeInBits();
+ uint32_t SrcBitSize = SrcTy->getScalarSizeInBits();
+ uint32_t DestBitSize = DestTy->getScalarSizeInBits();
// See if we can simplify any instructions used by the LHS whose sole
// purpose is to compute bits we don't care about.
@@ -8151,8 +8174,9 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
// Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- (isSafeIntegerType(DestTy) || !isSafeIntegerType(SrcI->getType())) &&
- CanEvaluateInDifferentType(SrcI, cast<IntegerType>(DestTy),
+ (isSafeIntegerType(DestTy->getScalarType()) ||
+ !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+ CanEvaluateInDifferentType(SrcI, DestTy,
CI.getOpcode(), NumCastsRemoved)) {
// If this cast is a truncate, evaluting in a different type always
// eliminates the cast, so it is always a win. If this is a zero-extension,
@@ -8350,17 +8374,18 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
Value *Src = CI.getOperand(0);
const Type *Ty = CI.getType();
- uint32_t DestBitWidth = Ty->getPrimitiveSizeInBits();
- uint32_t SrcBitWidth = cast<IntegerType>(Src->getType())->getBitWidth();
+ uint32_t DestBitWidth = Ty->getScalarSizeInBits();
+ uint32_t SrcBitWidth = Src->getType()->getScalarSizeInBits();
// Canonicalize trunc x to i1 -> (icmp ne (and x, 1), 0)
- if (DestBitWidth == 1) {
+ if (DestBitWidth == 1 &&
+ isa<VectorType>(Ty) == isa<VectorType>(Src->getType())) {
Constant *One = ConstantInt::get(Src->getType(), 1);
Src = InsertNewInstBefore(BinaryOperator::CreateAnd(Src, One, "tmp"), CI);
Value *Zero = Constant::getNullValue(Src->getType());
return new ICmpInst(ICmpInst::ICMP_NE, Src, Zero);
}
-
+
// Optimize trunc(lshr(), c) to pull the shift through the truncate.
ConstantInt *ShAmtV = 0;
Value *ShiftOp = 0;
@@ -8403,7 +8428,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
- In->getType()->getPrimitiveSizeInBits()-1);
+ In->getType()->getScalarSizeInBits()-1);
In = InsertNewInstBefore(BinaryOperator::CreateLShr(In, Sh,
In->getName()+".lobit"),
CI);
@@ -8494,28 +8519,30 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// Get the sizes of the types involved. We know that the intermediate type
// will be smaller than A or C, but don't know the relation between A and C.
Value *A = CSrc->getOperand(0);
- unsigned SrcSize = A->getType()->getPrimitiveSizeInBits();
- unsigned MidSize = CSrc->getType()->getPrimitiveSizeInBits();
- unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned SrcSize = A->getType()->getScalarSizeInBits();
+ unsigned MidSize = CSrc->getType()->getScalarSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If we're actually extending zero bits, then if
// SrcSize < DstSize: zext(a & mask)
// SrcSize == DstSize: a & mask
// SrcSize > DstSize: trunc(a) & mask
if (SrcSize < DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
- Constant *AndConst = ConstantInt::get(AndValue);
+ Constant *AndConst = ConstantInt::get(A->getType(), AndValue);
Instruction *And =
BinaryOperator::CreateAnd(A, AndConst, CSrc->getName()+".mask");
InsertNewInstBefore(And, CI);
return new ZExtInst(And, CI.getType());
} else if (SrcSize == DstSize) {
APInt AndValue(APInt::getLowBitsSet(SrcSize, MidSize));
- return BinaryOperator::CreateAnd(A, ConstantInt::get(AndValue));
+ return BinaryOperator::CreateAnd(A, ConstantInt::get(A->getType(),
+ AndValue));
} else if (SrcSize > DstSize) {
Instruction *Trunc = new TruncInst(A, CI.getType(), "tmp");
InsertNewInstBefore(Trunc, CI);
APInt AndValue(APInt::getLowBitsSet(DstSize, MidSize));
- return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(AndValue));
+ return BinaryOperator::CreateAnd(Trunc, ConstantInt::get(Trunc->getType(),
+ AndValue));
}
}
@@ -8537,6 +8564,33 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
}
}
+ // zext(trunc(t) & C) -> (t & zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::And && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (TruncInst *TI = dyn_cast<TruncInst>(SrcI->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType())
+ return
+ BinaryOperator::CreateAnd(TI0,
+ ConstantExpr::getZExt(C, CI.getType()));
+ }
+
+ // zext((trunc(t) & C) ^ C) -> ((t & zext(C)) ^ zext(C)).
+ if (SrcI && SrcI->getOpcode() == Instruction::Xor && SrcI->hasOneUse())
+ if (ConstantInt *C = dyn_cast<ConstantInt>(SrcI->getOperand(1)))
+ if (BinaryOperator *And = dyn_cast<BinaryOperator>(SrcI->getOperand(0)))
+ if (And->getOpcode() == Instruction::And && And->hasOneUse() &&
+ And->getOperand(1) == C)
+ if (TruncInst *TI = dyn_cast<TruncInst>(And->getOperand(0))) {
+ Value *TI0 = TI->getOperand(0);
+ if (TI0->getType() == CI.getType()) {
+ Constant *ZC = ConstantExpr::getZExt(C, CI.getType());
+ Instruction *NewAnd = BinaryOperator::CreateAnd(TI0, ZC, "tmp");
+ InsertNewInstBefore(NewAnd, *And);
+ return BinaryOperator::CreateXor(NewAnd, ZC);
+ }
+ }
+
return 0;
}
@@ -8556,9 +8610,9 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// eliminate the trunc/sext pair.
if (getOpcode(Src) == Instruction::Trunc) {
Value *Op = cast<User>(Src)->getOperand(0);
- unsigned OpBits = cast<IntegerType>(Op->getType())->getBitWidth();
- unsigned MidBits = cast<IntegerType>(Src->getType())->getBitWidth();
- unsigned DestBits = cast<IntegerType>(CI.getType())->getBitWidth();
+ unsigned OpBits = Op->getType()->getScalarSizeInBits();
+ unsigned MidBits = Src->getType()->getScalarSizeInBits();
+ unsigned DestBits = CI.getType()->getScalarSizeInBits();
unsigned NumSignBits = ComputeNumSignBits(Op);
if (OpBits == DestBits) {
@@ -8599,8 +8653,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
BA == CA && isa<TruncInst>(A)) {
Value *I = cast<TruncInst>(A)->getOperand(0);
if (I->getType() == CI.getType()) {
- unsigned MidSize = Src->getType()->getPrimitiveSizeInBits();
- unsigned SrcDstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned MidSize = Src->getType()->getScalarSizeInBits();
+ unsigned SrcDstSize = CI.getType()->getScalarSizeInBits();
unsigned ShAmt = CA->getZExtValue()+SrcDstSize-MidSize;
Constant *ShAmtV = ConstantInt::get(CI.getType(), ShAmt);
I = InsertNewInstBefore(BinaryOperator::CreateShl(I, ShAmtV,
@@ -8671,11 +8725,11 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
Value *RHSTrunc = LookThroughFPExtensions(OpI->getOperand(1));
if (LHSTrunc->getType() != SrcTy &&
RHSTrunc->getType() != SrcTy) {
- unsigned DstSize = CI.getType()->getPrimitiveSizeInBits();
+ unsigned DstSize = CI.getType()->getScalarSizeInBits();
// If the source types were both smaller than the destination type of
// the cast, do this xform.
- if (LHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize &&
- RHSTrunc->getType()->getPrimitiveSizeInBits() <= DstSize) {
+ if (LHSTrunc->getType()->getScalarSizeInBits() <= DstSize &&
+ RHSTrunc->getType()->getScalarSizeInBits() <= DstSize) {
LHSTrunc = InsertCastBefore(Instruction::FPExt, LHSTrunc,
CI.getType(), CI);
RHSTrunc = InsertCastBefore(Instruction::FPExt, RHSTrunc,
@@ -8706,7 +8760,7 @@ Instruction *InstCombiner::visitFPToUI(FPToUIInst &FI) {
// 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() < /*extra bit for sign */
+ (int)FI.getType()->getScalarSizeInBits() < /*extra bit for sign */
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
@@ -8726,7 +8780,7 @@ Instruction *InstCombiner::visitFPToSI(FPToSIInst &FI) {
// 'X' value would cause an undefined result for the fptoui.
if ((isa<UIToFPInst>(OpI) || isa<SIToFPInst>(OpI)) &&
OpI->getOperand(0)->getType() == FI.getType() &&
- (int)FI.getType()->getPrimitiveSizeInBits() <=
+ (int)FI.getType()->getScalarSizeInBits() <=
OpI->getType()->getFPMantissaWidth())
return ReplaceInstUsesWith(FI, OpI->getOperand(0));
@@ -8747,7 +8801,7 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
// trunc to be exposed to other transforms. Don't do this for extending
// ptrtoint's, because we don't know if the target sign or zero extends its
// pointers.
- if (CI.getType()->getPrimitiveSizeInBits() < TD->getPointerSizeInBits()) {
+ if (CI.getType()->getScalarSizeInBits() < TD->getPointerSizeInBits()) {
Value *P = InsertNewInstBefore(new PtrToIntInst(CI.getOperand(0),
TD->getIntPtrType(),
"tmp"), CI);
@@ -8763,7 +8817,7 @@ Instruction *InstCombiner::visitIntToPtr(IntToPtrInst &CI) {
// allows the trunc to be exposed to other transforms. Don't do this for
// extending inttoptr's, because we don't know if the target sign or zero
// extends to pointers.
- if (CI.getOperand(0)->getType()->getPrimitiveSizeInBits() >
+ if (CI.getOperand(0)->getType()->getScalarSizeInBits() >
TD->getPointerSizeInBits()) {
Value *P = InsertNewInstBefore(new TruncInst(CI.getOperand(0),
TD->getIntPtrType(),
@@ -9194,7 +9248,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
(Pred == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
Value *In = ICI->getOperand(0);
Value *Sh = ConstantInt::get(In->getType(),
- In->getType()->getPrimitiveSizeInBits()-1);
+ In->getType()->getScalarSizeInBits()-1);
In = InsertNewInstBefore(BinaryOperator::CreateAShr(In, Sh,
In->getName()+".lobit"),
*ICI);
@@ -9316,7 +9370,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
// The comparison constant and the result are not neccessarily the
// same width. Make an all-ones value by inserting a AShr.
Value *X = IC->getOperand(0);
- uint32_t Bits = X->getType()->getPrimitiveSizeInBits();
+ uint32_t Bits = X->getType()->getScalarSizeInBits();
Constant *ShAmt = ConstantInt::get(X->getType(), Bits-1);
Instruction *SRA = BinaryOperator::Create(Instruction::AShr, X,
ShAmt, "ones");
@@ -10850,8 +10904,8 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
static Value *InsertCastToIntPtrTy(Value *V, const Type *DTy,
Instruction *InsertPoint,
InstCombiner *IC) {
- unsigned PtrSize = DTy->getPrimitiveSizeInBits();
- unsigned VTySize = V->getType()->getPrimitiveSizeInBits();
+ unsigned PtrSize = DTy->getScalarSizeInBits();
+ unsigned VTySize = V->getType()->getScalarSizeInBits();
// We must cast correctly to the pointer type. Ensure that we
// sign extend the integer value if it is smaller as this is
// used for address computation.
@@ -10892,7 +10946,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
const Type *SrcTy = CI->getOperand(0)->getType();
// We can eliminate a cast from i32 to i64 iff the target
// is a 32-bit pointer target.
- if (SrcTy->getPrimitiveSizeInBits() >= TD->getPointerSizeInBits()) {
+ if (SrcTy->getScalarSizeInBits() >= TD->getPointerSizeInBits()) {
MadeChange = true;
*i = CI->getOperand(0);
}
@@ -11105,7 +11159,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
ConstantInt *Scale = 0;
if (ArrayEltSize == 1) {
NewIdx = GEP.getOperand(1);
- Scale = ConstantInt::get(NewIdx->getType(), 1);
+ Scale = ConstantInt::get(cast<IntegerType>(NewIdx->getType()), 1);
} else if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP.getOperand(1))) {
NewIdx = ConstantInt::get(CI->getType(), 1);
Scale = CI;
@@ -11114,7 +11168,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
isa<ConstantInt>(Inst->getOperand(1))) {
ConstantInt *ShAmt = cast<ConstantInt>(Inst->getOperand(1));
uint32_t ShAmtVal = ShAmt->getLimitedValue(64);
- Scale = ConstantInt::get(Inst->getType(), 1ULL << ShAmtVal);
+ Scale = ConstantInt::get(cast<IntegerType>(Inst->getType()),
+ 1ULL << ShAmtVal);
NewIdx = Inst->getOperand(0);
} else if (Inst->getOpcode() == Instruction::Mul &&
isa<ConstantInt>(Inst->getOperand(1))) {
@@ -11390,45 +11445,6 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
return 0;
}
-/// isSafeToLoadUnconditionally - Return true if we know that executing a load
-/// from this value cannot trap. If it is not obviously safe to load from the
-/// specified pointer, we do a quick local scan of the basic block containing
-/// ScanFrom, to determine if the address is already accessed.
-static bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
- // If it is an alloca it is always safe to load from.
- if (isa<AllocaInst>(V)) return true;
-
- // If it is a global variable it is mostly safe to load from.
- if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
- // Don't try to evaluate aliases. External weak GV can be null.
- return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
-
- // Otherwise, be a little bit agressive by scanning the local block where we
- // want to check to see if the pointer is already being loaded or stored
- // from/to. If so, the previous load or store would have already trapped,
- // so there is no harm doing an extra load (also, CSE will later eliminate
- // the load entirely).
- BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
-
- while (BBI != E) {
- --BBI;
-
- // If we see a free or a call (which might do a free) the pointer could be
- // marked invalid.
- if (isa<FreeInst>(BBI) ||
- (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI)))
- return false;
-
- if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
- if (LI->getOperand(0) == V) return true;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
- if (SI->getOperand(1) == V) return true;
- }
-
- }
- return false;
-}
-
Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
Value *Op = LI.getOperand(0);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index c0ca2df..5a70fc3 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -76,7 +76,7 @@ namespace {
bool ProcessBlock(BasicBlock *BB);
bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB,
unsigned JumpThreadCost);
- BasicBlock *FactorCommonPHIPreds(PHINode *PN, Constant *CstVal);
+ BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
@@ -163,10 +163,10 @@ void JumpThreading::FindLoopHeaders(Function &F) {
/// This is important for things like "phi i1 [true, true, false, true, x]"
/// where we only need to clone the block for the true blocks once.
///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Constant *CstVal) {
+BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
SmallVector<BasicBlock*, 16> CommonPreds;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == CstVal)
+ if (PN->getIncomingValue(i) == Val)
CommonPreds.push_back(PN->getIncomingBlock(i));
if (CommonPreds.size() == 1)
@@ -324,10 +324,6 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
- // If there is only a single predecessor of this block, nothing to fold.
- if (BB->getSinglePredecessor())
- return false;
-
// All the rest of our checks depend on the condition being an instruction.
if (CondInst == 0)
return false;
@@ -346,13 +342,36 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
CondInst->getOpcode() == Instruction::And))
return true;
- // If we have "br (phi != 42)" and the phi node has any constant values as
- // operands, we can thread through this block.
- if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst))
- if (isa<PHINode>(CondCmp->getOperand(0)) &&
- isa<Constant>(CondCmp->getOperand(1)) &&
- ProcessBranchOnCompare(CondCmp, BB))
- return true;
+ if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
+ if (isa<PHINode>(CondCmp->getOperand(0))) {
+ // If we have "br (phi != 42)" and the phi node has any constant values
+ // as operands, we can thread through this block.
+ //
+ // If we have "br (cmp phi, x)" and the phi node contains x such that the
+ // comparison uniquely identifies the branch target, we can thread
+ // through this block.
+
+ if (ProcessBranchOnCompare(CondCmp, BB))
+ return true;
+ }
+
+ // If we have a comparison, loop over the predecessors to see if there is
+ // a condition with the same value.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ for (; PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI->isConditional() && *PI != BB) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+ CI->getOperand(1) == CondCmp->getOperand(1) &&
+ CI->getPredicate() == CondCmp->getPredicate()) {
+ // TODO: Could handle things like (x != 4) --> (x == 17)
+ if (ProcessBranchOnDuplicateCond(*PI, BB))
+ return true;
+ }
+ }
+ }
+ }
// Check for some cases that are worth simplifying. Right now we want to look
// for loads that are used by a switch or by the condition for the branch. If
@@ -770,12 +789,30 @@ bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
return ThreadEdge(BB, PredBB, SuccBB, JumpThreadCost);
}
+/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
+/// hand sides of the compare instruction, try to determine the result. If the
+/// result can not be determined, a null pointer is returned.
+static Constant *GetResultOfComparison(CmpInst::Predicate pred,
+ Value *LHS, Value *RHS) {
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantExpr::getCompare(pred, CLHS, CRHS);
+
+ if (LHS == RHS)
+ if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
+ return ICmpInst::isTrueWhenEqual(pred) ?
+ ConstantInt::getTrue() : ConstantInt::getFalse();
+
+ return 0;
+}
+
/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a constant. If the PHI node contains any constants as inputs, we
-/// can fold the compare for that edge and thread through it.
+/// node and a value. If we can identify when the comparison is true between
+/// the phi inputs and the value, we can fold the compare for that edge and
+/// thread through it.
bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
- Constant *RHS = cast<Constant>(Cmp->getOperand(1));
+ Value *RHS = Cmp->getOperand(1);
// If the phi isn't in the current block, an incoming edge to this block
// doesn't control the destination.
@@ -784,18 +821,17 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
// We can do this simplification if any comparisons fold to true or false.
// See if any do.
- Constant *PredCst = 0;
+ Value *PredVal = 0;
bool TrueDirection = false;
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- PredCst = dyn_cast<Constant>(PN->getIncomingValue(i));
- if (PredCst == 0) continue;
+ PredVal = PN->getIncomingValue(i);
+
+ Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal, RHS);
+ if (!Res) {
+ PredVal = 0;
+ continue;
+ }
- Constant *Res;
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cmp))
- Res = ConstantExpr::getICmp(ICI->getPredicate(), PredCst, RHS);
- else
- Res = ConstantExpr::getFCmp(cast<FCmpInst>(Cmp)->getPredicate(),
- PredCst, RHS);
// If this folded to a constant expr, we can't do anything.
if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
TrueDirection = ResC->getZExtValue();
@@ -808,11 +844,11 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
}
// Otherwise, we can't fold this input.
- PredCst = 0;
+ PredVal = 0;
}
// If no match, bail out.
- if (PredCst == 0)
+ if (PredVal == 0)
return false;
// See if the cost of duplicating this block is low enough.
@@ -825,7 +861,7 @@ bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
// If so, we can actually do this threading. Merge any common predecessors
// that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
+ BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
// Next, get our successor.
BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 9c78596..6f7a7f8 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -290,13 +290,13 @@ static bool isUsedOutsideLoop(Value *V, Loop *L) {
// Return V+1
static Value *getPlusOne(Value *V, bool Sign, Instruction *InsertPt) {
- ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);
return BinaryOperator::CreateAdd(V, One, "lsp", InsertPt);
}
// Return V-1
static Value *getMinusOne(Value *V, bool Sign, Instruction *InsertPt) {
- ConstantInt *One = ConstantInt::get(V->getType(), 1, Sign);
+ Constant *One = ConstantInt::get(V->getType(), 1, Sign);
return BinaryOperator::CreateSub(V, One, "lsp", InsertPt);
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 944f409..7579748 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -143,10 +143,10 @@ namespace {
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
- /// OptimizeSMax - Rewrite the loop's terminating condition
- /// if it uses an smax computation.
- ICmpInst *OptimizeSMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse);
+ /// OptimizeMax - Rewrite the loop's terminating condition
+ /// if it uses a max computation.
+ ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse);
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
const SCEVHandle *&CondStride);
@@ -336,13 +336,6 @@ namespace {
/// EmittedBase.
Value *OperandValToReplace;
- /// isSigned - The stride (and thus also the Base) of this use may be in
- /// a narrower type than the use itself (OperandValToReplace->getType()).
- /// When this is the case, the isSigned field indicates whether the
- /// IV expression should be signed-extended instead of zero-extended to
- /// fit the type of the use.
- bool isSigned;
-
/// Imm - The immediate value that should be added to the base immediately
/// before Inst, because it will be folded into the imm field of the
/// instruction. This is also sometimes used for loop-variant values that
@@ -363,7 +356,6 @@ namespace {
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- isSigned(IVSU.isSigned()),
Imm(SE->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
@@ -428,11 +420,6 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEVHandle &NewBase,
NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
}
- if (isSigned)
- NewValSCEV = SE->getTruncateOrSignExtend(NewValSCEV, Ty);
- else
- NewValSCEV = SE->getTruncateOrZeroExtend(NewValSCEV, Ty);
-
return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
}
@@ -592,7 +579,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
if (Val->isLoopInvariant(L)) return; // Nothing to do.
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
@@ -613,7 +600,7 @@ static void MoveLoopVariantsToImmediateField(SCEVHandle &Val, SCEVHandle &Imm,
SCEVHandle Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
} else {
@@ -633,7 +620,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
bool isAddress, Loop *L,
ScalarEvolution *SE) {
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
- std::vector<SCEVHandle> NewOps;
+ SmallVector<SCEVHandle, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
@@ -660,7 +647,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
if (Start != SARE->getStart()) {
- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
}
@@ -717,7 +704,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
/// SeparateSubExprs - Decompose Expr into all of the subexpressions that are
/// added together. This is used to reassociate common addition subexprs
/// together for maximal sharing when rewriting bases.
-static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
+static void SeparateSubExprs(SmallVector<SCEVHandle, 16> &SubExprs,
SCEVHandle Expr,
ScalarEvolution *SE) {
if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(Expr)) {
@@ -729,7 +716,7 @@ static void SeparateSubExprs(std::vector<SCEVHandle> &SubExprs,
SubExprs.push_back(Expr);
} else {
// Compute the addrec with zero as its base.
- std::vector<SCEVHandle> Ops(SARE->op_begin(), SARE->op_end());
+ SmallVector<SCEVHandle, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
@@ -783,9 +770,9 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
- std::vector<SCEVHandle> UniqueSubExprs;
+ SmallVector<SCEVHandle, 16> UniqueSubExprs;
- std::vector<SCEVHandle> SubExprs;
+ SmallVector<SCEVHandle, 16> SubExprs;
unsigned NumUsesInsideLoop = 0;
for (unsigned i = 0; i != NumUses; ++i) {
// If the user is outside the loop, just ignore it for base computation.
@@ -1129,11 +1116,11 @@ static bool isNonConstantNegative(const SCEVHandle &Expr) {
return SC->getValue()->getValue().isNegative();
}
-// CollectIVUsers - Transform our list of users and offsets to a bit more
-// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-// of the strided accesses, as well as the old information from Uses. We
-// progressively move information from the Base field to the Imm field, until
-// we eventually have the full access expression to rewrite the use.
+/// CollectIVUsers - Transform our list of users and offsets to a bit more
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
+/// of the strided accesses, as well as the old information from Uses. We
+/// progressively move information from the Base field to the Imm field, until
+/// we eventually have the full access expression to rewrite the use.
SCEVHandle LoopStrengthReduce::CollectIVUsers(const SCEVHandle &Stride,
IVUsersOfOneStride &Uses,
Loop *L,
@@ -2008,15 +1995,15 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
- ConstantInt *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
+ Constant *CI = ConstantInt::get(NewCmpIntTy, NewCmpVal);
NewCmpRHS = ConstantExpr::getIntToPtr(CI, NewCmpTy);
}
NewOffset = TyBits == NewTyBits
? SE->getMulExpr(CondUse->getOffset(),
- SE->getConstant(ConstantInt::get(CmpTy, Scale)))
- : SE->getConstant(ConstantInt::get(NewCmpIntTy,
+ SE->getConstant(CmpTy, Scale))
+ : SE->getConstant(NewCmpIntTy,
cast<SCEVConstant>(CondUse->getOffset())->getValue()
- ->getSExtValue()*Scale));
+ ->getSExtValue()*Scale);
break;
}
}
@@ -2047,7 +2034,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS, false);
+ IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
@@ -2057,8 +2044,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
return Cond;
}
-/// OptimizeSMax - Rewrite the loop's terminating condition if it uses
-/// an smax computation.
+/// OptimizeMax - Rewrite the loop's terminating condition if it uses
+/// a max computation.
///
/// This is a narrow solution to a specific, but acute, problem. For loops
/// like this:
@@ -2068,10 +2055,10 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// p[i] = 0.0;
/// } while (++i < n);
///
-/// where the comparison is signed, the trip count isn't just 'n', because
-/// 'n' could be negative. And unfortunately this can come up even for loops
-/// where the user didn't use a C do-while loop. For example, seemingly
-/// well-behaved top-test loops will commonly be lowered like this:
+/// the trip count isn't just 'n', because 'n' might not be positive. And
+/// unfortunately this can come up even for loops where the user didn't use
+/// a C do-while loop. For example, seemingly well-behaved top-test loops
+/// will commonly be lowered like this:
//
/// if (n > 0) {
/// i = 0;
@@ -2084,14 +2071,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// test in such a way that indvars can't find it.
///
/// When indvars can't find the if test in loops like this, it creates a
-/// signed-max expression, which allows it to give the loop a canonical
+/// max expression, which allows it to give the loop a canonical
/// induction variable:
///
/// i = 0;
-/// smax = n < 1 ? 1 : n;
+/// max = n < 1 ? 1 : n;
/// do {
/// p[i] = 0.0;
-/// } while (++i != smax);
+/// } while (++i != max);
///
/// Canonical induction variables are necessary because the loop passes
/// are designed around them. The most obvious example of this is the
@@ -2107,8 +2094,8 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
/// the instructions for the maximum computation.
///
-ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse) {
+ICmpInst *LoopStrengthReduce::OptimizeMax(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse) {
// Check that the loop matches the pattern we're looking for.
if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
Cond->getPredicate() != CmpInst::ICMP_NE)
@@ -2126,12 +2113,19 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
SCEVHandle IterationCount = SE->getAddExpr(BackedgeTakenCount, One);
// Check for a max calculation that matches the pattern.
- const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(IterationCount);
- if (!SMax || SMax != SE->getSCEV(Sel)) return Cond;
+ if (!isa<SCEVSMaxExpr>(IterationCount) && !isa<SCEVUMaxExpr>(IterationCount))
+ return Cond;
+ const SCEVNAryExpr *Max = cast<SCEVNAryExpr>(IterationCount);
+ if (Max != SE->getSCEV(Sel)) return Cond;
+
+ // To handle a max with more than two operands, this optimization would
+ // require additional checking and setup.
+ if (Max->getNumOperands() != 2)
+ return Cond;
- SCEVHandle SMaxLHS = SMax->getOperand(0);
- SCEVHandle SMaxRHS = SMax->getOperand(1);
- if (!SMaxLHS || SMaxLHS != One) return Cond;
+ SCEVHandle MaxLHS = Max->getOperand(0);
+ SCEVHandle MaxRHS = Max->getOperand(1);
+ if (!MaxLHS || MaxLHS != One) return Cond;
// Check the relevant induction variable for conformance to
// the pattern.
@@ -2148,19 +2142,23 @@ ICmpInst *LoopStrengthReduce::OptimizeSMax(Loop *L, ICmpInst *Cond,
// Check the right operand of the select, and remember it, as it will
// be used in the new comparison instruction.
Value *NewRHS = 0;
- if (SE->getSCEV(Sel->getOperand(1)) == SMaxRHS)
+ if (SE->getSCEV(Sel->getOperand(1)) == MaxRHS)
NewRHS = Sel->getOperand(1);
- else if (SE->getSCEV(Sel->getOperand(2)) == SMaxRHS)
+ else if (SE->getSCEV(Sel->getOperand(2)) == MaxRHS)
NewRHS = Sel->getOperand(2);
if (!NewRHS) return Cond;
+ // Determine the new comparison opcode. It may be signed or unsigned,
+ // and the original comparison may be either equality or inequality.
+ CmpInst::Predicate Pred =
+ isa<SCEVSMaxExpr>(Max) ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
+ if (Cond->getPredicate() == CmpInst::ICMP_EQ)
+ Pred = CmpInst::getInversePredicate(Pred);
+
// Ok, everything looks ok to change the condition into an SLT or SGE and
// delete the max calculation.
ICmpInst *NewCond =
- new ICmpInst(Cond->getPredicate() == CmpInst::ICMP_NE ?
- CmpInst::ICMP_SLT :
- CmpInst::ICMP_SGE,
- Cond->getOperand(0), NewRHS, "scmp", Cond);
+ new ICmpInst(Pred, Cond->getOperand(0), NewRHS, "scmp", Cond);
// Delete the max calculation instructions.
Cond->replaceAllUsesWith(NewCond);
@@ -2242,7 +2240,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
- ConstantFP *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
+ Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
@@ -2266,7 +2264,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
PHINode *NewPH = PHINode::Create(DestTy, "IV.S.", PH);
/* create new increment. '++d' in above example. */
- ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue());
+ Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
@@ -2284,9 +2282,9 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
}
}
-// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
-// uses in the loop, look to see if we can eliminate some, in favor of using
-// common indvars for the different uses.
+/// OptimizeIndvars - Now that IVUsesByStride is set up with all of the indvar
+/// uses in the loop, look to see if we can eliminate some, in favor of using
+/// common indvars for the different uses.
void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
// TODO: implement optzns here.
@@ -2301,11 +2299,11 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// induction variable, to allow coalescing the live ranges for the IV into
// one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitBlock = L->getExitingBlock();
- if (!ExitBlock)
+ BasicBlock *ExitingBlock = L->getExitingBlock();
+ if (!ExitingBlock)
// Multiple exits, just look at the exit in the latch block if there is one.
- ExitBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitBlock->getTerminator());
+ ExitingBlock = LatchBlock;
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!TermBr)
return;
if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
@@ -2318,7 +2316,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
if (!FindIVUserForCond(Cond, CondUse, CondStride))
return; // setcc doesn't use the IV.
- if (ExitBlock != LatchBlock) {
+ if (ExitingBlock != LatchBlock) {
if (!Cond->hasOneUse())
// See below, we don't want the condition to be cloned.
return;
@@ -2373,14 +2371,14 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
StrideNoReuse.insert(*CondStride);
}
- // If the trip count is computed in terms of an smax (due to ScalarEvolution
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
// being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT instead of NE.
- Cond = OptimizeSMax(L, Cond, CondUse);
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
// If possible, change stride and operands of the compare instruction to
// eliminate one stride.
- if (ExitBlock == LatchBlock)
+ if (ExitingBlock == LatchBlock)
Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
// It's possible for the setcc instruction to be anywhere in the loop, and
@@ -2397,8 +2395,7 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
// Clone the IVUse, as the old use still exists!
IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace(),
- false);
+ CondUse->getOperandValToReplace());
CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
}
}
@@ -2413,9 +2410,9 @@ void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
++NumLoopCond;
}
-// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-// when to exit the loop is used only for that purpose, try to rearrange things
-// so it counts down to a test against zero.
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
// If the number of times the loop is executed isn't computable, give up.
@@ -2506,7 +2503,7 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
Value *startVal = phi->getIncomingValue(inBlock);
Value *endVal = Cond->getOperand(1);
// FIXME check for case where both are constant
- ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+ Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
BinaryOperator *NewStartVal =
BinaryOperator::Create(Instruction::Sub, endVal, startVal,
"tmp", PreInsertPt);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 7143c7b..d89790c 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -820,10 +820,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0>
} else {
// If EltTy is a vector type, get the element type.
- const Type *ValTy = EltTy;
- if (const VectorType *VTy = dyn_cast<VectorType>(ValTy))
- ValTy = VTy->getElementType();
-
+ const Type *ValTy = EltTy->getScalarType();
+
// Construct an integer with the right value.
unsigned EltSize = TD->getTypeSizeInBits(ValTy);
APInt OneVal(EltSize, CI->getZExtValue());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 59989c9..bbcb792 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -135,7 +135,11 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
TD->getIntPtrType(),
PointerType::getUnqual(Type::Int8Ty),
NULL);
- return B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ CallInst *CI = B.CreateCall(StrLen, CastToCStr(Ptr, B), "strlen");
+ if (const Function *F = dyn_cast<Function>(StrLen->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
@@ -164,7 +168,12 @@ Value *LibCallOptimization::EmitMemChr(Value *Ptr, Value *Val,
PointerType::getUnqual(Type::Int8Ty),
Type::Int32Ty, TD->getIntPtrType(),
NULL);
- return B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+ CallInst *CI = B.CreateCall3(MemChr, CastToCStr(Ptr, B), Val, Len, "memchr");
+
+ if (const Function *F = dyn_cast<Function>(MemChr->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitMemCmp - Emit a call to the memcmp function.
@@ -182,8 +191,13 @@ Value *LibCallOptimization::EmitMemCmp(Value *Ptr1, Value *Ptr2,
PointerType::getUnqual(Type::Int8Ty),
PointerType::getUnqual(Type::Int8Ty),
TD->getIntPtrType(), NULL);
- return B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
- Len, "memcmp");
+ CallInst *CI = B.CreateCall3(MemCmp, CastToCStr(Ptr1, B), CastToCStr(Ptr2, B),
+ Len, "memcmp");
+
+ if (const Function *F = dyn_cast<Function>(MemCmp->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitMemSet - Emit a call to the memset function
@@ -217,20 +231,30 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
NameBuffer[NameLen+1] = 0;
Name = NameBuffer;
}
-
+
Module *M = Caller->getParent();
- Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
+ Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(), NULL);
- return B.CreateCall(Callee, Op, Name);
+ CallInst *CI = B.CreateCall(Callee, Op, Name);
+
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
Module *M = Caller->getParent();
- Value *F = M->getOrInsertFunction("putchar", Type::Int32Ty,
- Type::Int32Ty, NULL);
- B.CreateCall(F, B.CreateIntCast(Char, Type::Int32Ty, "chari"), "putchar");
+ Value *PutChar = M->getOrInsertFunction("putchar", Type::Int32Ty,
+ Type::Int32Ty, NULL);
+ CallInst *CI = B.CreateCall(PutChar,
+ B.CreateIntCast(Char, Type::Int32Ty, "chari"),
+ "putchar");
+
+ if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
}
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
@@ -241,10 +265,14 @@ void LibCallOptimization::EmitPutS(Value *Str, IRBuilder<> &B) {
AWI[0] = AttributeWithIndex::get(1, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
- Value *F = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
- Type::Int32Ty,
- PointerType::getUnqual(Type::Int8Ty), NULL);
- B.CreateCall(F, CastToCStr(Str, B), "puts");
+ Value *PutS = M->getOrInsertFunction("puts", AttrListPtr::get(AWI, 2),
+ Type::Int32Ty,
+ PointerType::getUnqual(Type::Int8Ty),
+ NULL);
+ CallInst *CI = B.CreateCall(PutS, CastToCStr(Str, B), "puts");
+ if (const Function *F = dyn_cast<Function>(PutS->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
}
/// EmitFPutC - Emit a call to the fputc function. This assumes that Char is
@@ -258,12 +286,14 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
if (isa<PointerType>(File->getType()))
F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2), Type::Int32Ty,
Type::Int32Ty, File->getType(), NULL);
-
else
F = M->getOrInsertFunction("fputc", Type::Int32Ty, Type::Int32Ty,
File->getType(), NULL);
Char = B.CreateIntCast(Char, Type::Int32Ty, "chari");
- B.CreateCall2(F, Char, File, "fputc");
+ CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
}
/// EmitFPutS - Emit a call to the puts function. Str is required to be a
@@ -283,7 +313,10 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
F = M->getOrInsertFunction("fputs", Type::Int32Ty,
PointerType::getUnqual(Type::Int8Ty),
File->getType(), NULL);
- B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+ CallInst *CI = B.CreateCall2(F, CastToCStr(Str, B), File, "fputs");
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
}
/// EmitFWrite - Emit a call to the fwrite function. This assumes that Ptr is
@@ -307,8 +340,11 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
PointerType::getUnqual(Type::Int8Ty),
TD->getIntPtrType(), TD->getIntPtrType(),
File->getType(), NULL);
- B.CreateCall4(F, CastToCStr(Ptr, B), Size,
- ConstantInt::get(TD->getIntPtrType(), 1), File);
+ CallInst *CI = B.CreateCall4(F, CastToCStr(Ptr, B), Size,
+ ConstantInt::get(TD->getIntPtrType(), 1), File);
+
+ if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
+ CI->setCallingConv(Fn->getCallingConv());
}
//===----------------------------------------------------------------------===//
@@ -673,12 +709,10 @@ struct VISIBILITY_HIDDEN StrCmpOpt : public LibCallOptimization {
// strcmp(P, "x") -> memcmp(P, "x", 2)
uint64_t Len1 = GetStringLength(Str1P);
uint64_t Len2 = GetStringLength(Str2P);
- if (Len1 || Len2) {
- // Choose the smallest Len excluding 0 which means 'unknown'.
- if (!Len1 || (Len2 && Len2 < Len1))
- Len1 = Len2;
+ if (Len1 && Len2) {
return EmitMemCmp(Str1P, Str2P,
- ConstantInt::get(TD->getIntPtrType(), Len1), B);
+ ConstantInt::get(TD->getIntPtrType(),
+ std::min(Len1, Len2)), B);
}
return 0;
@@ -1039,7 +1073,7 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
if (OpC->getOperand(0)->getType()->getPrimitiveSizeInBits() < 32)
LdExpArg = B.CreateZExt(OpC->getOperand(0), Type::Int32Ty, "tmp");
}
-
+
if (LdExpArg) {
const char *Name;
if (Op->getType() == Type::FloatTy)
@@ -1056,12 +1090,15 @@ struct VISIBILITY_HIDDEN Exp2Opt : public LibCallOptimization {
Module *M = Caller->getParent();
Value *Callee = M->getOrInsertFunction(Name, Op->getType(),
Op->getType(), Type::Int32Ty,NULL);
- return B.CreateCall2(Callee, One, LdExpArg);
+ CallInst *CI = B.CreateCall2(Callee, One, LdExpArg);
+ if (const Function *F = dyn_cast<Function>(Callee->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+
+ return CI;
}
return 0;
}
};
-
//===---------------------------------------===//
// Double -> Float Shrinking Optimizations for Unary Functions like 'floor'
@@ -1072,7 +1109,7 @@ struct VISIBILITY_HIDDEN UnaryDoubleFPOpt : public LibCallOptimization {
if (FT->getNumParams() != 1 || FT->getReturnType() != Type::DoubleTy ||
FT->getParamType(0) != Type::DoubleTy)
return 0;
-
+
// If this is something like 'floor((double)floatval)', convert to floorf.
FPExtInst *Cast = dyn_cast<FPExtInst>(CI->getOperand(1));
if (Cast == 0 || Cast->getOperand(0)->getType() != Type::FloatTy)
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 682d069..34ee57c 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -52,6 +52,7 @@
#define DEBUG_TYPE "tailcallelim"
#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
@@ -201,8 +202,21 @@ bool TailCallElim::runOnFunction(Function &F) {
bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// FIXME: We can move load/store/call/free instructions above the call if the
// call does not mod/ref the memory location being processed.
- if (I->mayHaveSideEffects() || isa<LoadInst>(I))
+ if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
+
+ if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ // Loads may always be moved above calls without side effects.
+ if (CI->mayHaveSideEffects()) {
+ // Non-volatile loads may be moved above a call with side effects if it
+ // does not write to memory and the load provably won't trap.
+ // FIXME: Writes to memory only matter if they may alias the pointer
+ // being loaded from.
+ if (CI->mayWriteToMemory() ||
+ !isSafeToLoadUnconditionally(L->getPointerOperand(), L))
+ return false;
+ }
+ }
// Otherwise, if this is a side-effect free instruction, check to make sure
// that it does not use the return value of the call. If it doesn't use the
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 94483b8..c7fff54 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Constants.h"
+#include "llvm/GlobalAlias.h"
#include "llvm/GlobalVariable.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
@@ -28,6 +29,50 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// Local analysis.
+//
+
+/// isSafeToLoadUnconditionally - Return true if we know that executing a load
+/// from this value cannot trap. If it is not obviously safe to load from the
+/// specified pointer, we do a quick local scan of the basic block containing
+/// ScanFrom, to determine if the address is already accessed.
+bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom) {
+ // If it is an alloca it is always safe to load from.
+ if (isa<AllocaInst>(V)) return true;
+
+ // If it is a global variable it is mostly safe to load from.
+ if (const GlobalValue *GV = dyn_cast<GlobalVariable>(V))
+ // Don't try to evaluate aliases. External weak GV can be null.
+ return !isa<GlobalAlias>(GV) && !GV->hasExternalWeakLinkage();
+
+ // Otherwise, be a little bit agressive by scanning the local block where we
+ // want to check to see if the pointer is already being loaded or stored
+ // from/to. If so, the previous load or store would have already trapped,
+ // so there is no harm doing an extra load (also, CSE will later eliminate
+ // the load entirely).
+ BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin();
+
+ while (BBI != E) {
+ --BBI;
+
+ // If we see a free or a call which may write to memory (i.e. which might do
+ // a free) the pointer could be marked invalid.
+ if (isa<FreeInst>(BBI) ||
+ (isa<CallInst>(BBI) && BBI->mayWriteToMemory() &&
+ !isa<DbgInfoIntrinsic>(BBI)))
+ return false;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) {
+ if (LI->getOperand(0) == V) return true;
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) {
+ if (SI->getOperand(1) == V) return true;
+ }
+ }
+ return false;
+}
+
+
+//===----------------------------------------------------------------------===//
// Local constant propagation.
//
diff --git a/lib/Transforms/Utils/LowerAllocations.cpp b/lib/Transforms/Utils/LowerAllocations.cpp
index 3249895..9af47f5 100644
--- a/lib/Transforms/Utils/LowerAllocations.cpp
+++ b/lib/Transforms/Utils/LowerAllocations.cpp
@@ -112,7 +112,7 @@ bool LowerAllocations::runOnBasicBlock(BasicBlock &BB) {
if (MallocInst *MI = dyn_cast<MallocInst>(I)) {
const Type *AllocTy = MI->getType()->getElementType();
- // malloc(type) becomes sbyte *malloc(size)
+ // malloc(type) becomes i8 *malloc(size)
Value *MallocArg;
if (LowerMallocArgToInteger)
MallocArg = ConstantInt::get(Type::Int64Ty,
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index bcc6b81..ee0f6a6 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -859,6 +859,26 @@ static bool FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
return Changed;
}
+// isSafeToHoistInvoke - If we would need to insert a select that uses the
+// value of this invoke (comments in HoistThenElseCodeToIf explain why we
+// would need to do this), we can't hoist the invoke, as there is nowhere
+// to put the select in this case.
+static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
+ Instruction *I1, Instruction *I2) {
+ for (succ_iterator SI = succ_begin(BB1), E = succ_end(BB1); SI != E; ++SI) {
+ PHINode *PN;
+ for (BasicBlock::iterator BBI = SI->begin();
+ (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
+ Value *BB1V = PN->getIncomingValueForBlock(BB1);
+ Value *BB2V = PN->getIncomingValueForBlock(BB2);
+ if (BB1V != BB2V && (BB1V==I1 || BB2V==I2)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
/// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and
/// BB2, hoist any common code in the two blocks up into the branch block. The
/// caller of this function guarantees that BI's block dominates BB1 and BB2.
@@ -879,8 +899,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
I1 = BB1_Itr++;
while (isa<DbgInfoIntrinsic>(I2))
I2 = BB2_Itr++;
- if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
- isa<InvokeInst>(I1) || !I1->isIdenticalTo(I2))
+ if (I1->getOpcode() != I2->getOpcode() || isa<PHINode>(I1) ||
+ !I1->isIdenticalTo(I2) ||
+ (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
return false;
// If we get here, we can hoist at least one instruction.
@@ -911,6 +932,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI) {
return true;
HoistTerminator:
+ // It may not be possible to hoist an invoke.
+ if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
+ return true;
+
// Okay, it is safe to hoist the terminator.
Instruction *NT = I1->clone();
BIParent->getInstList().insert(BI, NT);
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 6b369b6..73b1ed6 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1384,7 +1384,10 @@ void AssemblyWriter::printFunction(const Function *F) {
case CallingConv::Fast: Out << "fastcc "; break;
case CallingConv::Cold: Out << "coldcc "; break;
case CallingConv::X86_StdCall: Out << "x86_stdcallcc "; break;
- case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+ case CallingConv::X86_FastCall: Out << "x86_fastcallcc "; break;
+ case CallingConv::ARM_APCS: Out << "arm_apcscc "; break;
+ case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break;
+ case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
default: Out << "cc" << F->getCallingConv() << " "; break;
}
@@ -1640,7 +1643,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::Fast: Out << " fastcc"; break;
case CallingConv::Cold: Out << " coldcc"; break;
case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
- case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
+ case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
+ case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
default: Out << " cc" << CI->getCallingConv(); break;
}
@@ -1688,6 +1694,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::Cold: Out << " coldcc"; break;
case CallingConv::X86_StdCall: Out << " x86_stdcallcc"; break;
case CallingConv::X86_FastCall: Out << " x86_fastcallcc"; break;
+ case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
+ case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
+ case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
default: Out << " cc" << II->getCallingConv(); break;
}
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 1d293cc..3aab0cc 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -208,6 +208,22 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
}
}
+ // If the cast operand is a constant vector, perform the cast by
+ // operating on each element. In the cast of bitcasts, the element
+ // count may be mismatched; don't attempt to handle that here.
+ if (const ConstantVector *CV = dyn_cast<ConstantVector>(V))
+ if (isa<VectorType>(DestTy) &&
+ cast<VectorType>(DestTy)->getNumElements() ==
+ CV->getType()->getNumElements()) {
+ std::vector<Constant*> res;
+ const VectorType *DestVecTy = cast<VectorType>(DestTy);
+ const Type *DstEltTy = DestVecTy->getElementType();
+ for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
+ res.push_back(ConstantExpr::getCast(opc,
+ CV->getOperand(i), DstEltTy));
+ return ConstantVector::get(DestVecTy, res);
+ }
+
// We actually have to do a cast now. Perform the cast according to the
// opcode specified.
switch (opc) {
@@ -237,14 +253,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
APInt Val(DestBitWidth, 2, x);
return ConstantInt::get(Val);
}
- if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
- std::vector<Constant*> res;
- const VectorType *DestVecTy = cast<VectorType>(DestTy);
- const Type *DstEltTy = DestVecTy->getElementType();
- for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
- return ConstantVector::get(DestVecTy, res);
- }
return 0; // Can't fold.
case Instruction::IntToPtr: //always treated as unsigned
if (V->isNullValue()) // Is it an integral null value?
@@ -266,14 +274,6 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, const Constant *V,
APFloat::rmNearestTiesToEven);
return ConstantFP::get(apf);
}
- if (const ConstantVector *CV = dyn_cast<ConstantVector>(V)) {
- std::vector<Constant*> res;
- const VectorType *DestVecTy = cast<VectorType>(DestTy);
- const Type *DstEltTy = DestVecTy->getElementType();
- for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
- res.push_back(ConstantExpr::getCast(opc, CV->getOperand(i), DstEltTy));
- return ConstantVector::get(DestVecTy, res);
- }
return 0;
case Instruction::ZExt:
if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
@@ -629,7 +629,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
}
- // Handle simplifications of the RHS when a constant int.
+ // Handle simplifications when the RHS is a constant int.
if (const ConstantInt *CI2 = dyn_cast<ConstantInt>(C2)) {
switch (Opcode) {
case Instruction::Add:
@@ -773,6 +773,20 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
}
}
}
+
+ switch (Opcode) {
+ case Instruction::SDiv:
+ case Instruction::UDiv:
+ case Instruction::URem:
+ case Instruction::SRem:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ case Instruction::Shl:
+ if (CI1->equalsInt(0)) return const_cast<Constant*>(C1);
+ break;
+ default:
+ break;
+ }
} else if (const ConstantFP *CFP1 = dyn_cast<ConstantFP>(C1)) {
if (const ConstantFP *CFP2 = dyn_cast<ConstantFP>(C2)) {
APFloat C1V = CFP1->getValueAPF();
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 69c503d..c164a3b 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -25,6 +25,9 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include <algorithm>
@@ -35,6 +38,9 @@ using namespace llvm;
// Constant Class
//===----------------------------------------------------------------------===//
+// Becomes a no-op when multithreading is disabled.
+ManagedStatic<sys::SmartRWMutex<true> > ConstantsLock;
+
void Constant::destroyConstantImpl() {
// When a Constant is destroyed, there may be lingering
// references to the constant by other constants in the constant pool. These
@@ -269,9 +275,20 @@ typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
DenseMapAPIntKeyInfo> IntMapTy;
static ManagedStatic<IntMapTy> IntConstants;
-ConstantInt *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
- const IntegerType *ITy = cast<IntegerType>(Ty);
- return get(APInt(ITy->getBitWidth(), V, isSigned));
+ConstantInt *ConstantInt::get(const IntegerType *Ty,
+ uint64_t V, bool isSigned) {
+ return get(APInt(Ty->getBitWidth(), V, isSigned));
+}
+
+Constant *ConstantInt::get(const Type *Ty, uint64_t V, bool isSigned) {
+ Constant *C = get(cast<IntegerType>(Ty->getScalarType()), V, isSigned);
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return
+ ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+ return C;
}
// Get a ConstantInt from an APInt. Note that the value stored in the DenseMap
@@ -284,12 +301,35 @@ ConstantInt *ConstantInt::get(const APInt& V) {
const IntegerType *ITy = IntegerType::get(V.getBitWidth());
// get an existing value or the insertion position
DenseMapAPIntKeyInfo::KeyTy Key(V, ITy);
+
+ ConstantsLock->reader_acquire();
ConstantInt *&Slot = (*IntConstants)[Key];
- // if it exists, return it.
- if (Slot)
+ ConstantsLock->reader_release();
+
+ if (!Slot) {
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+ ConstantInt *&NewSlot = (*IntConstants)[Key];
+ if (!Slot) {
+ NewSlot = new ConstantInt(ITy, V);
+ }
+
+ return NewSlot;
+ } else {
return Slot;
- // otherwise create a new one, insert it, and return it.
- return Slot = new ConstantInt(ITy, V);
+ }
+}
+
+Constant *ConstantInt::get(const Type *Ty, const APInt &V) {
+ ConstantInt *C = ConstantInt::get(V);
+ assert(C->getType() == Ty->getScalarType() &&
+ "ConstantInt type doesn't match the type implied by its value!");
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return
+ ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+ return C;
}
//===----------------------------------------------------------------------===//
@@ -368,34 +408,54 @@ static ManagedStatic<FPMapTy> FPConstants;
ConstantFP *ConstantFP::get(const APFloat &V) {
DenseMapAPFloatKeyInfo::KeyTy Key(V);
- ConstantFP *&Slot = (*FPConstants)[Key];
- if (Slot) return Slot;
- const Type *Ty;
- if (&V.getSemantics() == &APFloat::IEEEsingle)
- Ty = Type::FloatTy;
- else if (&V.getSemantics() == &APFloat::IEEEdouble)
- Ty = Type::DoubleTy;
- else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
- Ty = Type::X86_FP80Ty;
- else if (&V.getSemantics() == &APFloat::IEEEquad)
- Ty = Type::FP128Ty;
- else {
- assert(&V.getSemantics() == &APFloat::PPCDoubleDouble&&"Unknown FP format");
- Ty = Type::PPC_FP128Ty;
+ ConstantsLock->reader_acquire();
+ ConstantFP *&Slot = (*FPConstants)[Key];
+ ConstantsLock->reader_release();
+
+ if (!Slot) {
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+ ConstantFP *&NewSlot = (*FPConstants)[Key];
+ if (!NewSlot) {
+ const Type *Ty;
+ if (&V.getSemantics() == &APFloat::IEEEsingle)
+ Ty = Type::FloatTy;
+ else if (&V.getSemantics() == &APFloat::IEEEdouble)
+ Ty = Type::DoubleTy;
+ else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+ Ty = Type::X86_FP80Ty;
+ else if (&V.getSemantics() == &APFloat::IEEEquad)
+ Ty = Type::FP128Ty;
+ else {
+ assert(&V.getSemantics() == &APFloat::PPCDoubleDouble &&
+ "Unknown FP format");
+ Ty = Type::PPC_FP128Ty;
+ }
+ NewSlot = new ConstantFP(Ty, V);
+ }
+
+ return NewSlot;
}
- return Slot = new ConstantFP(Ty, V);
+ return Slot;
}
/// get() - This returns a constant fp for the specified value in the
/// specified type. This should only be used for simple constant values like
/// 2.0/1.0 etc, that are known-valid both as double and as the target format.
-ConstantFP *ConstantFP::get(const Type *Ty, double V) {
+Constant *ConstantFP::get(const Type *Ty, double V) {
APFloat FV(V);
bool ignored;
- FV.convert(*TypeToFloatSemantics(Ty), APFloat::rmNearestTiesToEven, &ignored);
- return get(FV);
+ FV.convert(*TypeToFloatSemantics(Ty->getScalarType()),
+ APFloat::rmNearestTiesToEven, &ignored);
+ Constant *C = get(FV);
+
+ // For vectors, broadcast the value.
+ if (const VectorType *VTy = dyn_cast<VectorType>(Ty))
+ return
+ ConstantVector::get(std::vector<Constant *>(VTy->getNumElements(), C));
+
+ return C;
}
//===----------------------------------------------------------------------===//
@@ -1093,8 +1153,13 @@ namespace llvm {
/// AbstractTypeMap - Map for abstract type constants.
///
AbstractTypeMapTy AbstractTypeMap;
+
+ /// ValueMapLock - Mutex for this map.
+ sys::SmartMutex<true> ValueMapLock;
public:
+ // NOTE: This function is not locked. It is the caller's responsibility
+ // to enforce proper synchronization.
typename MapTy::iterator map_end() { return Map.end(); }
/// InsertOrGetItem - Return an iterator for the specified element.
@@ -1102,6 +1167,8 @@ namespace llvm {
/// entry and Exists=true. If not, the iterator points to the newly
/// inserted entry and returns Exists=false. Newly inserted entries have
/// I->second == 0, and should be filled in.
+ /// NOTE: This function is not locked. It is the caller's responsibility
+ // to enforce proper synchronization.
typename MapTy::iterator InsertOrGetItem(std::pair<MapKey, Constant *>
&InsertVal,
bool &Exists) {
@@ -1131,19 +1198,10 @@ private:
}
return I;
}
-public:
- /// getOrCreate - Return the specified constant from the map, creating it if
- /// necessary.
- ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
- MapKey Lookup(Ty, V);
- typename MapTy::iterator I = Map.find(Lookup);
- // Is it in the map?
- if (I != Map.end())
- return static_cast<ConstantClass *>(I->second);
-
- // If no preexisting value, create one now...
- ConstantClass *Result =
+ ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+ typename MapTy::iterator I) {
+ ConstantClass* Result =
ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
assert(Result->getType() == Ty && "Type specified is not correct!");
@@ -1151,11 +1209,12 @@ public:
if (HasLargeKey) // Remember the reverse mapping if needed.
InverseMap.insert(std::make_pair(Result, I));
-
- // If the type of the constant is abstract, make sure that an entry exists
- // for it in the AbstractTypeMap.
+
+ // If the type of the constant is abstract, make sure that an entry
+ // exists for it in the AbstractTypeMap.
if (Ty->isAbstract()) {
- typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(Ty);
+ typename AbstractTypeMapTy::iterator TI =
+ AbstractTypeMap.find(Ty);
if (TI == AbstractTypeMap.end()) {
// Add ourselves to the ATU list of the type.
@@ -1164,10 +1223,33 @@ public:
AbstractTypeMap.insert(TI, std::make_pair(Ty, I));
}
}
+
+ return Result;
+ }
+public:
+
+ /// getOrCreate - Return the specified constant from the map, creating it if
+ /// necessary.
+ ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+ sys::SmartScopedLock<true> Lock(&ValueMapLock);
+ MapKey Lookup(Ty, V);
+ ConstantClass* Result = 0;
+
+ typename MapTy::iterator I = Map.find(Lookup);
+ // Is it in the map?
+ if (I != Map.end())
+ Result = static_cast<ConstantClass *>(I->second);
+
+ if (!Result) {
+ // If no preexisting value, create one now...
+ Result = Create(Ty, V, I);
+ }
+
return Result;
}
void remove(ConstantClass *CP) {
+ sys::SmartScopedLock<true> Lock(&ValueMapLock);
typename MapTy::iterator I = FindExistingElement(CP);
assert(I != Map.end() && "Constant not found in constant table!");
assert(I->second == CP && "Didn't find correct element?");
@@ -1221,6 +1303,8 @@ public:
/// MoveConstantToNewSlot - If we are about to change C to be the element
/// specified by I, update our internal data structures to reflect this
/// fact.
+ /// NOTE: This function is not locked. It is the responsibility of the
+ /// caller to enforce proper synchronization if using this method.
void MoveConstantToNewSlot(ConstantClass *C, typename MapTy::iterator I) {
// First, remove the old location of the specified constant in the map.
typename MapTy::iterator OldI = FindExistingElement(C);
@@ -1250,6 +1334,7 @@ public:
}
void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
+ sys::SmartScopedLock<true> Lock(&ValueMapLock);
typename AbstractTypeMapTy::iterator I =
AbstractTypeMap.find(cast<Type>(OldTy));
@@ -1314,12 +1399,15 @@ static char getValType(ConstantAggregateZero *CPZ) { return 0; }
ConstantAggregateZero *ConstantAggregateZero::get(const Type *Ty) {
assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
"Cannot create an aggregate zero of non-aggregate type!");
+
+ // Implicitly locked.
return AggZeroConstants->getOrCreate(Ty, 0);
}
/// destroyConstant - Remove the constant from the constant table...
///
void ConstantAggregateZero::destroyConstant() {
+ // Implicitly locked.
AggZeroConstants->remove(this);
destroyConstantImpl();
}
@@ -1359,18 +1447,24 @@ Constant *ConstantArray::get(const ArrayType *Ty,
// If this is an all-zero array, return a ConstantAggregateZero object
if (!V.empty()) {
Constant *C = V[0];
- if (!C->isNullValue())
+ if (!C->isNullValue()) {
+ // Implicitly locked.
return ArrayConstants->getOrCreate(Ty, V);
+ }
for (unsigned i = 1, e = V.size(); i != e; ++i)
- if (V[i] != C)
+ if (V[i] != C) {
+ // Implicitly locked.
return ArrayConstants->getOrCreate(Ty, V);
+ }
}
+
return ConstantAggregateZero::get(Ty);
}
/// destroyConstant - Remove the constant from the constant table...
///
void ConstantArray::destroyConstant() {
+ // Implicitly locked.
ArrayConstants->remove(this);
destroyConstantImpl();
}
@@ -1482,6 +1576,7 @@ Constant *ConstantStruct::get(const StructType *Ty,
// Create a ConstantAggregateZero value if all elements are zeros...
for (unsigned i = 0, e = V.size(); i != e; ++i)
if (!V[i]->isNullValue())
+ // Implicitly locked.
return StructConstants->getOrCreate(Ty, V);
return ConstantAggregateZero::get(Ty);
@@ -1498,6 +1593,7 @@ Constant *ConstantStruct::get(const std::vector<Constant*> &V, bool packed) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantStruct::destroyConstant() {
+ // Implicitly locked.
StructConstants->remove(this);
destroyConstantImpl();
}
@@ -1552,6 +1648,8 @@ Constant *ConstantVector::get(const VectorType *Ty,
return ConstantAggregateZero::get(Ty);
if (isUndef)
return UndefValue::get(Ty);
+
+ // Implicitly locked.
return VectorConstants->getOrCreate(Ty, V);
}
@@ -1563,6 +1661,7 @@ Constant *ConstantVector::get(const std::vector<Constant*> &V) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantVector::destroyConstant() {
+ // Implicitly locked.
VectorConstants->remove(this);
destroyConstantImpl();
}
@@ -1627,12 +1726,14 @@ static char getValType(ConstantPointerNull *) {
ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
+ // Implicitly locked.
return NullPtrConstants->getOrCreate(Ty, 0);
}
// destroyConstant - Remove the constant from the constant table...
//
void ConstantPointerNull::destroyConstant() {
+ // Implicitly locked.
NullPtrConstants->remove(this);
destroyConstantImpl();
}
@@ -1670,12 +1771,14 @@ static char getValType(UndefValue *) {
UndefValue *UndefValue::get(const Type *Ty) {
+ // Implicitly locked.
return UndefValueConstants->getOrCreate(Ty, 0);
}
// destroyConstant - Remove the constant from the constant table.
//
void UndefValue::destroyConstant() {
+ // Implicitly locked.
UndefValueConstants->remove(this);
destroyConstantImpl();
}
@@ -1690,15 +1793,18 @@ MDString::MDString(const char *begin, const char *end)
static ManagedStatic<StringMap<MDString*> > MDStringCache;
MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
- StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(StrBegin,
- StrEnd);
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+ StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
+ StrBegin, StrEnd);
MDString *&S = Entry.getValue();
if (!S) S = new MDString(Entry.getKeyData(),
Entry.getKeyData() + Entry.getKeyLength());
+
return S;
}
void MDString::destroyConstant() {
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
destroyConstantImpl();
}
@@ -1724,18 +1830,27 @@ MDNode *MDNode::get(Value*const* Vals, unsigned NumVals) {
for (unsigned i = 0; i != NumVals; ++i)
ID.AddPointer(Vals[i]);
+ ConstantsLock->reader_acquire();
void *InsertPoint;
- if (MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint))
- return N;
-
- // InsertPoint will have been set by the FindNodeOrInsertPos call.
- MDNode *N = new(0) MDNode(Vals, NumVals);
- MDNodeSet->InsertNode(N, InsertPoint);
+ MDNode *N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
+ ConstantsLock->reader_release();
+
+ if (!N) {
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+ N = MDNodeSet->FindNodeOrInsertPos(ID, InsertPoint);
+ if (!N) {
+ // InsertPoint will have been set by the FindNodeOrInsertPos call.
+ N = new(0) MDNode(Vals, NumVals);
+ MDNodeSet->InsertNode(N, InsertPoint);
+ }
+ }
return N;
}
void MDNode::destroyConstant() {
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
MDNodeSet->RemoveNode(this);
+
destroyConstantImpl();
}
@@ -1902,6 +2017,8 @@ static inline Constant *getFoldedCast(
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> argVec(1, C);
ExprMapKeyType Key(opc, argVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(Ty, Key);
}
@@ -1932,19 +2049,19 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, const Type *Ty) {
}
Constant *ConstantExpr::getZExtOrBitCast(Constant *C, const Type *Ty) {
- if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getCast(Instruction::BitCast, C, Ty);
return getCast(Instruction::ZExt, C, Ty);
}
Constant *ConstantExpr::getSExtOrBitCast(Constant *C, const Type *Ty) {
- if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getCast(Instruction::BitCast, C, Ty);
return getCast(Instruction::SExt, C, Ty);
}
Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
- if (C->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (C->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return getCast(Instruction::BitCast, C, Ty);
return getCast(Instruction::Trunc, C, Ty);
}
@@ -1960,9 +2077,10 @@ Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
bool isSigned) {
- assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
- unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
- unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ assert(C->getType()->isIntOrIntVector() &&
+ Ty->isIntOrIntVector() && "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
Instruction::CastOps opcode =
(SrcBits == DstBits ? Instruction::BitCast :
(SrcBits > DstBits ? Instruction::Trunc :
@@ -1971,10 +2089,10 @@ Constant *ConstantExpr::getIntegerCast(Constant *C, const Type *Ty,
}
Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
- assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
"Invalid cast");
- unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
- unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
if (SrcBits == DstBits)
return C; // Avoid a useless cast
Instruction::CastOps opcode =
@@ -1983,42 +2101,67 @@ Constant *ConstantExpr::getFPCast(Constant *C, const Type *Ty) {
}
Constant *ConstantExpr::getTrunc(Constant *C, const Type *Ty) {
- assert(C->getType()->isInteger() && "Trunc operand must be integer");
- assert(Ty->isInteger() && "Trunc produces only integral");
- assert(C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVector() && "Trunc operand must be integer");
+ assert(Ty->isIntOrIntVector() && "Trunc produces only integral");
+ assert(C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
"SrcTy must be larger than DestTy for Trunc!");
return getFoldedCast(Instruction::Trunc, C, Ty);
}
Constant *ConstantExpr::getSExt(Constant *C, const Type *Ty) {
- assert(C->getType()->isInteger() && "SEXt operand must be integral");
- assert(Ty->isInteger() && "SExt produces only integer");
- assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVector() && "SExt operand must be integral");
+ assert(Ty->isIntOrIntVector() && "SExt produces only integer");
+ assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"SrcTy must be smaller than DestTy for SExt!");
return getFoldedCast(Instruction::SExt, C, Ty);
}
Constant *ConstantExpr::getZExt(Constant *C, const Type *Ty) {
- assert(C->getType()->isInteger() && "ZEXt operand must be integral");
- assert(Ty->isInteger() && "ZExt produces only integer");
- assert(C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isIntOrIntVector() && "ZEXt operand must be integral");
+ assert(Ty->isIntOrIntVector() && "ZExt produces only integer");
+ assert(C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"SrcTy must be smaller than DestTy for ZExt!");
return getFoldedCast(Instruction::ZExt, C, Ty);
}
Constant *ConstantExpr::getFPTrunc(Constant *C, const Type *Ty) {
- assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
- C->getType()->getPrimitiveSizeInBits() > Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ C->getType()->getScalarSizeInBits() > Ty->getScalarSizeInBits()&&
"This is an illegal floating point truncation!");
return getFoldedCast(Instruction::FPTrunc, C, Ty);
}
Constant *ConstantExpr::getFPExtend(Constant *C, const Type *Ty) {
- assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
- C->getType()->getPrimitiveSizeInBits() < Ty->getPrimitiveSizeInBits()&&
+#ifndef NDEBUG
+ bool fromVec = C->getType()->getTypeID() == Type::VectorTyID;
+ bool toVec = Ty->getTypeID() == Type::VectorTyID;
+#endif
+ assert((fromVec == toVec) && "Cannot convert from scalar to/from vector");
+ assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
+ C->getType()->getScalarSizeInBits() < Ty->getScalarSizeInBits()&&
"This is an illegal floating point extension!");
return getFoldedCast(Instruction::FPExt, C, Ty);
}
@@ -2136,6 +2279,8 @@ Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
ExprMapKeyType Key(Opcode, argVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ReqTy, Key);
}
@@ -2188,34 +2333,30 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) {
case Instruction::UDiv:
case Instruction::SDiv:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
- cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+ assert(C1->getType()->isIntOrIntVector() &&
"Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::FDiv:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
- && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint()))
- && "Tried to create an arithmetic operation on a non-arithmetic type!");
+ assert(C1->getType()->isFPOrFPVector() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::URem:
case Instruction::SRem:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert((C1->getType()->isInteger() || (isa<VectorType>(C1->getType()) &&
- cast<VectorType>(C1->getType())->getElementType()->isInteger())) &&
+ assert(C1->getType()->isIntOrIntVector() &&
"Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::FRem:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert((C1->getType()->isFloatingPoint() || (isa<VectorType>(C1->getType())
- && cast<VectorType>(C1->getType())->getElementType()->isFloatingPoint()))
- && "Tried to create an arithmetic operation on a non-arithmetic type!");
+ assert(C1->getType()->isFPOrFPVector() &&
+ "Tried to create an arithmetic operation on a non-arithmetic type!");
break;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
assert(C1->getType() == C2->getType() && "Op types should be identical!");
- assert((C1->getType()->isInteger() || isa<VectorType>(C1->getType())) &&
+ assert(C1->getType()->isIntOrIntVector() &&
"Tried to create a logical operation on a non-integral type!");
break;
case Instruction::Shl:
@@ -2251,6 +2392,8 @@ Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
argVec[1] = V1;
argVec[2] = V2;
ExprMapKeyType Key(Instruction::Select, argVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ReqTy, Key);
}
@@ -2274,6 +2417,8 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
for (unsigned i = 0; i != NumIdx; ++i)
ArgVec.push_back(cast<Constant>(Idxs[i]));
const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ReqTy, Key);
}
@@ -2308,6 +2453,8 @@ ConstantExpr::getICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
ArgVec.push_back(RHS);
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::ICmp, ArgVec, pred);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(Type::Int1Ty, Key);
}
@@ -2325,6 +2472,8 @@ ConstantExpr::getFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
ArgVec.push_back(RHS);
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::FCmp, ArgVec, pred);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(Type::Int1Ty, Key);
}
@@ -2370,6 +2519,8 @@ ConstantExpr::getVICmp(unsigned short pred, Constant* LHS, Constant* RHS) {
ArgVec.push_back(RHS);
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::VICmp, ArgVec, pred);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(LHS->getType(), Key);
}
@@ -2417,6 +2568,8 @@ ConstantExpr::getVFCmp(unsigned short pred, Constant* LHS, Constant* RHS) {
ArgVec.push_back(RHS);
// Get the key type with both the opcode and predicate
const ExprMapKeyType Key(Instruction::VFCmp, ArgVec, pred);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ResultTy, Key);
}
@@ -2428,6 +2581,8 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
std::vector<Constant*> ArgVec(1, Val);
ArgVec.push_back(Idx);
const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ReqTy, Key);
}
@@ -2449,6 +2604,8 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
ArgVec.push_back(Elt);
ArgVec.push_back(Idx);
const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ReqTy, Key);
}
@@ -2472,6 +2629,8 @@ Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
ArgVec.push_back(V2);
ArgVec.push_back(Mask);
const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+
+ // Implicitly locked.
return ExprConstants->getOrCreate(ReqTy, Key);
}
@@ -2555,6 +2714,7 @@ Constant *ConstantExpr::getZeroValueForNegationExpr(const Type *Ty) {
// destroyConstant - Remove the constant from the constant table...
//
void ConstantExpr::destroyConstant() {
+ // Implicitly locked.
ExprConstants->remove(this);
destroyConstantImpl();
}
@@ -2619,6 +2779,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
Replacement = ConstantAggregateZero::get(getType());
} else {
// Check to see if we have this array type already.
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
bool Exists;
ArrayConstantsTy::MapTy::iterator I =
ArrayConstants->InsertOrGetItem(Lookup, Exists);
@@ -2694,6 +2855,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
Replacement = ConstantAggregateZero::get(getType());
} else {
// Check to see if we have this array type already.
+ sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
bool Exists;
StructConstantsTy::MapTy::iterator I =
StructConstants->InsertOrGetItem(Lookup, Exists);
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 54bd895..0450566 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -16,7 +16,10 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Support/LeakDetector.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/StringPool.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
#include "SymbolTableListTraitsImpl.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringExtras.h"
@@ -230,17 +233,21 @@ void Function::removeAttribute(unsigned i, Attributes attr) {
// use GC.
static DenseMap<const Function*,PooledStringPtr> *GCNames;
static StringPool *GCNamePool;
+static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
bool Function::hasGC() const {
+ sys::SmartScopedReader<true> Reader(&*GCLock);
return GCNames && GCNames->count(this);
}
const char *Function::getGC() const {
assert(hasGC() && "Function has no collector");
+ sys::SmartScopedReader<true> Reader(&*GCLock);
return *(*GCNames)[this];
}
void Function::setGC(const char *Str) {
+ sys::SmartScopedWriter<true> Writer(&*GCLock);
if (!GCNamePool)
GCNamePool = new StringPool();
if (!GCNames)
@@ -249,6 +256,7 @@ void Function::setGC(const char *Str) {
}
void Function::clearGC() {
+ sys::SmartScopedWriter<true> Writer(&*GCLock);
if (GCNames) {
GCNames->erase(this);
if (GCNames->empty()) {
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 4c228fe..6a6424d 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -1310,7 +1310,7 @@ bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
return false;// Second operand of insertelement must be vector element type.
if (Index->getType() != Type::Int32Ty)
- return false; // Third operand of insertelement must be uint.
+ return false; // Third operand of insertelement must be i32.
return true;
}
@@ -1576,9 +1576,8 @@ void BinaryOperator::init(BinaryOps iType) {
case FDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
- cast<VectorType>(getType())->getElementType()->isFloatingPoint()))
- && "Incorrect operand type (not floating point) for FDIV");
+ assert(getType()->isFPOrFPVector() &&
+ "Incorrect operand type (not floating point) for FDIV");
break;
case URem:
case SRem:
@@ -1591,9 +1590,8 @@ void BinaryOperator::init(BinaryOps iType) {
case FRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isFloatingPoint() || (isa<VectorType>(getType()) &&
- cast<VectorType>(getType())->getElementType()->isFloatingPoint()))
- && "Incorrect operand type (not floating point) for FREM");
+ assert(getType()->isFPOrFPVector() &&
+ "Incorrect operand type (not floating point) for FREM");
break;
case Shl:
case LShr:
@@ -1837,11 +1835,11 @@ bool CastInst::isNoopCast(const Type *IntPtrTy) const {
case Instruction::BitCast:
return true; // BitCast never modifies bits.
case Instruction::PtrToInt:
- return IntPtrTy->getPrimitiveSizeInBits() ==
- getType()->getPrimitiveSizeInBits();
+ return IntPtrTy->getScalarSizeInBits() ==
+ getType()->getScalarSizeInBits();
case Instruction::IntToPtr:
- return IntPtrTy->getPrimitiveSizeInBits() ==
- getOperand(0)->getType()->getPrimitiveSizeInBits();
+ return IntPtrTy->getScalarSizeInBits() ==
+ getOperand(0)->getType()->getScalarSizeInBits();
}
}
@@ -1880,8 +1878,8 @@ unsigned CastInst::isEliminableCastPair(
// BITCONVERT = FirstClass n/a FirstClass n/a
//
// NOTE: some transforms are safe, but we consider them to be non-profitable.
- // For example, we could merge "fptoui double to uint" + "zext uint to ulong",
- // into "fptoui double to ulong", but this loses information about the range
+ // For example, we could merge "fptoui double to i32" + "zext i32 to i64",
+ // into "fptoui double to i64", but this loses information about the range
// of the produced value (we no longer know the top-part is all zeros).
// Further this conversion is often much more expensive for typical hardware,
// and causes issues when building libgcc. We disallow fptosi+sext for the
@@ -1946,8 +1944,8 @@ unsigned CastInst::isEliminableCastPair(
return 0;
case 7: {
// ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size
- unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
- unsigned MidSize = MidTy->getPrimitiveSizeInBits();
+ unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+ unsigned MidSize = MidTy->getScalarSizeInBits();
if (MidSize >= PtrSize)
return Instruction::BitCast;
return 0;
@@ -1956,8 +1954,8 @@ unsigned CastInst::isEliminableCastPair(
// ext, trunc -> bitcast, if the SrcTy and DstTy are same size
// ext, trunc -> ext, if sizeof(SrcTy) < sizeof(DstTy)
// ext, trunc -> trunc, if sizeof(SrcTy) > sizeof(DstTy)
- unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ unsigned DstSize = DstTy->getScalarSizeInBits();
if (SrcSize == DstSize)
return Instruction::BitCast;
else if (SrcSize < DstSize)
@@ -1985,9 +1983,9 @@ unsigned CastInst::isEliminableCastPair(
return 0;
case 13: {
// inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize
- unsigned PtrSize = IntPtrTy->getPrimitiveSizeInBits();
- unsigned SrcSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DstSize = DstTy->getPrimitiveSizeInBits();
+ unsigned PtrSize = IntPtrTy->getScalarSizeInBits();
+ unsigned SrcSize = SrcTy->getScalarSizeInBits();
+ unsigned DstSize = DstTy->getScalarSizeInBits();
if (SrcSize <= PtrSize && SrcSize == DstSize)
return Instruction::BitCast;
return 0;
@@ -2051,7 +2049,7 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, const Type *Ty,
CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
const std::string &Name,
Instruction *InsertBefore) {
- if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
return Create(Instruction::ZExt, S, Ty, Name, InsertBefore);
}
@@ -2059,7 +2057,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
const std::string &Name,
BasicBlock *InsertAtEnd) {
- if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
return Create(Instruction::ZExt, S, Ty, Name, InsertAtEnd);
}
@@ -2067,7 +2065,7 @@ CastInst *CastInst::CreateZExtOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
const std::string &Name,
Instruction *InsertBefore) {
- if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
return Create(Instruction::SExt, S, Ty, Name, InsertBefore);
}
@@ -2075,7 +2073,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
const std::string &Name,
BasicBlock *InsertAtEnd) {
- if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
return Create(Instruction::SExt, S, Ty, Name, InsertAtEnd);
}
@@ -2083,7 +2081,7 @@ CastInst *CastInst::CreateSExtOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
const std::string &Name,
Instruction *InsertBefore) {
- if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertBefore);
return Create(Instruction::Trunc, S, Ty, Name, InsertBefore);
}
@@ -2091,7 +2089,7 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
const std::string &Name,
BasicBlock *InsertAtEnd) {
- if (S->getType()->getPrimitiveSizeInBits() == Ty->getPrimitiveSizeInBits())
+ if (S->getType()->getScalarSizeInBits() == Ty->getScalarSizeInBits())
return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd);
return Create(Instruction::Trunc, S, Ty, Name, InsertAtEnd);
}
@@ -2125,8 +2123,8 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
bool isSigned, const std::string &Name,
Instruction *InsertBefore) {
assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
- unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
- unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
Instruction::CastOps opcode =
(SrcBits == DstBits ? Instruction::BitCast :
(SrcBits > DstBits ? Instruction::Trunc :
@@ -2137,9 +2135,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
bool isSigned, const std::string &Name,
BasicBlock *InsertAtEnd) {
- assert(C->getType()->isInteger() && Ty->isInteger() && "Invalid cast");
- unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
- unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ assert(C->getType()->isIntOrIntVector() && Ty->isIntOrIntVector() &&
+ "Invalid cast");
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
Instruction::CastOps opcode =
(SrcBits == DstBits ? Instruction::BitCast :
(SrcBits > DstBits ? Instruction::Trunc :
@@ -2150,10 +2149,10 @@ CastInst *CastInst::CreateIntegerCast(Value *C, const Type *Ty,
CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
const std::string &Name,
Instruction *InsertBefore) {
- assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
"Invalid cast");
- unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
- unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
Instruction::CastOps opcode =
(SrcBits == DstBits ? Instruction::BitCast :
(SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
@@ -2163,10 +2162,10 @@ CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
CastInst *CastInst::CreateFPCast(Value *C, const Type *Ty,
const std::string &Name,
BasicBlock *InsertAtEnd) {
- assert(C->getType()->isFloatingPoint() && Ty->isFloatingPoint() &&
+ assert(C->getType()->isFPOrFPVector() && Ty->isFPOrFPVector() &&
"Invalid cast");
- unsigned SrcBits = C->getType()->getPrimitiveSizeInBits();
- unsigned DstBits = Ty->getPrimitiveSizeInBits();
+ unsigned SrcBits = C->getType()->getScalarSizeInBits();
+ unsigned DstBits = Ty->getScalarSizeInBits();
Instruction::CastOps opcode =
(SrcBits == DstBits ? Instruction::BitCast :
(SrcBits > DstBits ? Instruction::FPTrunc : Instruction::FPExt));
@@ -2183,8 +2182,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
return true;
// Get the bit sizes, we'll need these
- unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
- unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+ unsigned SrcBits = SrcTy->getScalarSizeInBits(); // 0 for ptr
+ unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
// Run through the possibilities ...
if (DestTy->isInteger()) { // Casting to integral
@@ -2242,8 +2241,8 @@ CastInst::getCastOpcode(
const Value *Src, bool SrcIsSigned, const Type *DestTy, bool DestIsSigned) {
// Get the bit sizes, we'll need these
const Type *SrcTy = Src->getType();
- unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
- unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr/vector
+ unsigned SrcBits = SrcTy->getScalarSizeInBits(); // 0 for ptr
+ unsigned DestBits = DestTy->getScalarSizeInBits(); // 0 for ptr
assert(SrcTy->isFirstClassType() && DestTy->isFirstClassType() &&
"Only first class types are castable!");
@@ -2344,8 +2343,8 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
return false;
// Get the size of the types in bits, we'll need this later
- unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DstBitSize = DstTy->getPrimitiveSizeInBits();
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DstBitSize = DstTy->getScalarSizeInBits();
// Switch on the opcode provided
switch (op) {
@@ -2400,7 +2399,7 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
// Now we know we're not dealing with a pointer/non-pointer mismatch. In all
// these cases, the cast is okay if the source and destination bit widths
// are identical.
- return SrcBitSize == DstBitSize;
+ return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits();
}
}
diff --git a/lib/VMCore/LeakDetector.cpp b/lib/VMCore/LeakDetector.cpp
index 1bf9171..b5926bc 100644
--- a/lib/VMCore/LeakDetector.cpp
+++ b/lib/VMCore/LeakDetector.cpp
@@ -14,7 +14,10 @@
#include "llvm/Support/LeakDetector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Streams.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
#include "llvm/Value.h"
using namespace llvm;
@@ -29,16 +32,29 @@ namespace {
static void print(const Value* P) { cerr << *P; }
};
+ ManagedStatic<sys::SmartRWMutex<true> > LeakDetectorLock;
+
template <typename T>
struct VISIBILITY_HIDDEN LeakDetectorImpl {
- explicit LeakDetectorImpl(const char* const name) : Cache(0), Name(name) { }
+ explicit LeakDetectorImpl(const char* const name = "") :
+ Cache(0), Name(name) { }
+ void clear() {
+ Cache = 0;
+ Ts.clear();
+ }
+
+ void setName(const char* n) {
+ Name = n;
+ }
+
// Because the most common usage pattern, by far, is to add a
// garbage object, then remove it immediately, we optimize this
// case. When an object is added, it is not added to the set
// immediately, it is added to the CachedValue Value. If it is
// immediately removed, no set search need be performed.
void addGarbage(const T* o) {
+ sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
if (Cache) {
assert(Ts.count(Cache) == 0 && "Object already in set!");
Ts.insert(Cache);
@@ -47,6 +63,7 @@ namespace {
}
void removeGarbage(const T* o) {
+ sys::SmartScopedWriter<true> Writer(&*LeakDetectorLock);
if (o == Cache)
Cache = 0; // Cache hit
else
@@ -56,6 +73,7 @@ namespace {
bool hasGarbage(const std::string& Message) {
addGarbage(0); // Flush the Cache
+ sys::SmartScopedReader<true> Reader(&*LeakDetectorLock);
assert(Cache == 0 && "No value should be cached anymore!");
if (!Ts.empty()) {
@@ -70,58 +88,48 @@ namespace {
return true;
}
+
return false;
}
private:
SmallPtrSet<const T*, 8> Ts;
const T* Cache;
- const char* const Name;
+ const char* Name;
};
- static LeakDetectorImpl<void> *Objects;
- static LeakDetectorImpl<Value> *LLVMObjects;
-
- static LeakDetectorImpl<void> &getObjects() {
- if (Objects == 0)
- Objects = new LeakDetectorImpl<void>("GENERIC");
- return *Objects;
- }
-
- static LeakDetectorImpl<Value> &getLLVMObjects() {
- if (LLVMObjects == 0)
- LLVMObjects = new LeakDetectorImpl<Value>("LLVM");
- return *LLVMObjects;
- }
+ static ManagedStatic<LeakDetectorImpl<void> > Objects;
+ static ManagedStatic<LeakDetectorImpl<Value> > LLVMObjects;
static void clearGarbage() {
- delete Objects;
- delete LLVMObjects;
- Objects = 0;
- LLVMObjects = 0;
+ Objects->clear();
+ LLVMObjects->clear();
}
}
void LeakDetector::addGarbageObjectImpl(void *Object) {
- getObjects().addGarbage(Object);
+ Objects->addGarbage(Object);
}
void LeakDetector::addGarbageObjectImpl(const Value *Object) {
- getLLVMObjects().addGarbage(Object);
+ LLVMObjects->addGarbage(Object);
}
void LeakDetector::removeGarbageObjectImpl(void *Object) {
- getObjects().removeGarbage(Object);
+ Objects->removeGarbage(Object);
}
void LeakDetector::removeGarbageObjectImpl(const Value *Object) {
- getLLVMObjects().removeGarbage(Object);
+ LLVMObjects->removeGarbage(Object);
}
void LeakDetector::checkForGarbageImpl(const std::string &Message) {
+ Objects->setName("GENERIC");
+ LLVMObjects->setName("LLVM");
+
// use non-short-circuit version so that both checks are performed
- if (getObjects().hasGarbage(Message) |
- getLLVMObjects().hasGarbage(Message))
+ if (Objects->hasGarbage(Message) |
+ LLVMObjects->hasGarbage(Message))
cerr << "\nThis is probably because you removed an object, but didn't "
<< "delete it. Please check your code for memory leaks.\n";
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 0bd190a..1a68b89 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/Mangler.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
+#include "llvm/System/Atomic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
@@ -164,8 +165,12 @@ std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
} else if (!GV->hasName()) {
// Must mangle the global into a unique ID.
unsigned TypeUniqueID = getTypeID(GV->getType());
- static unsigned GlobalID = 0;
- Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(GlobalID++);
+ static uint32_t GlobalID = 0;
+
+ unsigned OldID = GlobalID;
+ sys::AtomicIncrement(&GlobalID);
+
+ Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID);
} else {
if (GV->hasPrivateLinkage())
Name = makeNameProper(GV->getName() + Suffix, Prefix, PrivatePrefix);
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 6db5d7e..e943e31 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -19,6 +19,8 @@
#include "llvm/ModuleProvider.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/Atomic.h"
+#include "llvm/System/Threading.h"
#include <algorithm>
#include <map>
#include <set>
@@ -192,8 +194,26 @@ static std::vector<PassRegistrationListener*> *Listeners = 0;
// ressurection after llvm_shutdown is run.
static PassRegistrar *getPassRegistrar() {
static PassRegistrar *PassRegistrarObj = 0;
- if (!PassRegistrarObj)
+
+ // Use double-checked locking to safely initialize the registrar when
+ // we're running in multithreaded mode.
+ PassRegistrar* tmp = PassRegistrarObj;
+ if (llvm_is_multithreaded()) {
+ sys::MemoryFence();
+ if (!tmp) {
+ llvm_acquire_global_lock();
+ tmp = PassRegistrarObj;
+ if (!tmp) {
+ tmp = new PassRegistrar();
+ sys::MemoryFence();
+ PassRegistrarObj = tmp;
+ }
+ llvm_release_global_lock();
+ }
+ } else if (!tmp) {
PassRegistrarObj = new PassRegistrar();
+ }
+
return PassRegistrarObj;
}
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 4799915..86cf10e 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -20,6 +20,8 @@
#include "llvm/Support/Streams.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/Threading.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm-c/Core.h"
#include <algorithm>
@@ -355,6 +357,9 @@ namespace {
/// amount of time each pass takes to execute. This only happens when
/// -time-passes is enabled on the command line.
///
+
+static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
+
class VISIBILITY_HIDDEN TimingInfo {
std::map<Pass*, Timer> TimingData;
TimerGroup TG;
@@ -379,15 +384,18 @@ public:
if (dynamic_cast<PMDataManager *>(P))
return;
+ sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
std::map<Pass*, Timer>::iterator I = TimingData.find(P);
if (I == TimingData.end())
I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first;
I->second.startTimer();
}
+
void passEnded(Pass *P) {
if (dynamic_cast<PMDataManager *>(P))
return;
+ sys::SmartScopedLock<true> Lock(&*TimingInfoMutex);
std::map<Pass*, Timer>::iterator I = TimingData.find(P);
assert(I != TimingData.end() && "passStarted/passEnded not nested right!");
I->second.stopTimer();
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index a1e6c42..5df7f12 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -23,6 +23,9 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
#include <algorithm>
#include <cstdarg>
using namespace llvm;
@@ -40,6 +43,14 @@ AbstractTypeUser::~AbstractTypeUser() {}
// Type Class Implementation
//===----------------------------------------------------------------------===//
+// Reader/writer lock used for guarding access to the type maps.
+static ManagedStatic<sys::SmartRWMutex<true> > TypeMapLock;
+
+// Recursive lock used for guarding access to AbstractTypeUsers.
+// NOTE: The true template parameter means this will no-op when we're not in
+// multithreaded mode.
+static ManagedStatic<sys::SmartMutex<true> > AbstractTypeUsersLock;
+
// Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
// for types as they are needed. Because resolution of types must invalidate
// all of the abstract type descriptions, we keep them in a seperate map to make
@@ -112,6 +123,14 @@ const Type *Type::getVAArgsPromotedType() const {
return this;
}
+/// getScalarType - If this is a vector type, return the element type,
+/// otherwise return this.
+const Type *Type::getScalarType() const {
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType();
+ return this;
+}
+
/// isIntOrIntVector - Return true if this is an integer type or a vector of
/// integer types.
///
@@ -174,6 +193,28 @@ unsigned Type::getPrimitiveSizeInBits() const {
}
}
+/// getScalarSizeInBits - If this is a vector type, return the
+/// getPrimitiveSizeInBits value for the element type. Otherwise return the
+/// getPrimitiveSizeInBits value for this type.
+unsigned Type::getScalarSizeInBits() const {
+ return getScalarType()->getPrimitiveSizeInBits();
+}
+
+/// getFPMantissaWidth - Return the width of the mantissa of this type. This
+/// is only valid on floating point types. If the FP type does not
+/// have a stable mantissa (e.g. ppc long double), this method returns -1.
+int Type::getFPMantissaWidth() const {
+ if (const VectorType *VTy = dyn_cast<VectorType>(this))
+ return VTy->getElementType()->getFPMantissaWidth();
+ assert(isFloatingPoint() && "Not a floating point type!");
+ if (ID == FloatTyID) return 24;
+ if (ID == DoubleTyID) return 53;
+ if (ID == X86_FP80TyID) return 64;
+ if (ID == FP128TyID) return 113;
+ assert(ID == PPC_FP128TyID && "unknown fp type");
+ return -1;
+}
+
/// isSizedDerivedType - Derived types like structures and arrays are sized
/// iff all of the members of the type are sized as well. Since asking for
/// their size is relatively uncommon, move this operation out of line.
@@ -414,8 +455,29 @@ void DerivedType::dropAllTypeUses() {
if (NumContainedTys != 0) {
// The type must stay abstract. To do this, we insert a pointer to a type
// that will never get resolved, thus will always be abstract.
- static Type *AlwaysOpaqueTy = OpaqueType::get();
- static PATypeHolder Holder(AlwaysOpaqueTy);
+ static Type *AlwaysOpaqueTy = 0;
+ static PATypeHolder* Holder = 0;
+ Type *tmp = AlwaysOpaqueTy;
+ if (llvm_is_multithreaded()) {
+ sys::MemoryFence();
+ if (!tmp) {
+ llvm_acquire_global_lock();
+ tmp = AlwaysOpaqueTy;
+ if (!tmp) {
+ tmp = OpaqueType::get();
+ PATypeHolder* tmp2 = new PATypeHolder(AlwaysOpaqueTy);
+ sys::MemoryFence();
+ AlwaysOpaqueTy = tmp;
+ Holder = tmp2;
+ }
+
+ llvm_release_global_lock();
+ }
+ } else {
+ AlwaysOpaqueTy = OpaqueType::get();
+ Holder = new PATypeHolder(AlwaysOpaqueTy);
+ }
+
ContainedTys[0] = AlwaysOpaqueTy;
// Change the rest of the types to be Int32Ty's. It doesn't matter what we
@@ -818,7 +880,7 @@ public:
// We already have this type in the table. Get rid of the newly refined
// type.
TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
- Ty->refineAbstractTypeTo(NewTy);
+ Ty->unlockedRefineAbstractTypeTo(NewTy);
return;
}
} else {
@@ -854,7 +916,7 @@ public:
}
TypesByHash.erase(Entry);
}
- Ty->refineAbstractTypeTo(NewTy);
+ Ty->unlockedRefineAbstractTypeTo(NewTy);
return;
}
}
@@ -938,15 +1000,30 @@ const IntegerType *IntegerType::get(unsigned NumBits) {
default:
break;
}
-
+
IntegerValType IVT(NumBits);
- IntegerType *ITy = IntegerTypes->get(IVT);
- if (ITy) return ITy; // Found a match, return it!
-
- // Value not found. Derive a new type!
- ITy = new IntegerType(NumBits);
- IntegerTypes->add(IVT, ITy);
-
+ IntegerType *ITy = 0;
+
+ // First, see if the type is already in the table, for which
+ // a reader lock suffices.
+ TypeMapLock->reader_acquire();
+ ITy = IntegerTypes->get(IVT);
+ TypeMapLock->reader_release();
+
+ if (!ITy) {
+ // OK, not in the table, get a writer lock.
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+ ITy = IntegerTypes->get(IVT);
+
+ // We need to _recheck_ the table in case someone
+ // put it in between when we released the reader lock
+ // and when we gained the writer lock!
+ if (!ITy) {
+ // Value not found. Derive a new type!
+ ITy = new IntegerType(NumBits);
+ IntegerTypes->add(IVT, ITy);
+ }
+ }
#ifdef DEBUG_MERGE_TYPES
DOUT << "Derived new type: " << *ITy << "\n";
#endif
@@ -1010,14 +1087,26 @@ FunctionType *FunctionType::get(const Type *ReturnType,
const std::vector<const Type*> &Params,
bool isVarArg) {
FunctionValType VT(ReturnType, Params, isVarArg);
- FunctionType *FT = FunctionTypes->get(VT);
- if (FT)
- return FT;
-
- FT = (FunctionType*) operator new(sizeof(FunctionType) +
- sizeof(PATypeHandle)*(Params.size()+1));
- new (FT) FunctionType(ReturnType, Params, isVarArg);
- FunctionTypes->add(VT, FT);
+ FunctionType *FT = 0;
+
+ TypeMapLock->reader_acquire();
+ FT = FunctionTypes->get(VT);
+ TypeMapLock->reader_release();
+
+ if (!FT) {
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+
+ // Have to check again here, because it might have
+ // been inserted between when we release the reader
+ // lock and when we acquired the writer lock.
+ FT = FunctionTypes->get(VT);
+ if (!FT) {
+ FT = (FunctionType*) operator new(sizeof(FunctionType) +
+ sizeof(PATypeHandle)*(Params.size()+1));
+ new (FT) FunctionType(ReturnType, Params, isVarArg);
+ FunctionTypes->add(VT, FT);
+ }
+ }
#ifdef DEBUG_MERGE_TYPES
DOUT << "Derived new type: " << FT << "\n";
@@ -1049,20 +1138,30 @@ public:
}
};
}
-static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
+static ManagedStatic<TypeMap<ArrayValType, ArrayType> > ArrayTypes;
ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
assert(ElementType && "Can't get array of <null> types!");
assert(isValidElementType(ElementType) && "Invalid type for array element!");
ArrayValType AVT(ElementType, NumElements);
- ArrayType *AT = ArrayTypes->get(AVT);
- if (AT) return AT; // Found a match, return it!
-
- // Value not found. Derive a new type!
- ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
-
+ ArrayType *AT = 0;
+
+ TypeMapLock->reader_acquire();
+ AT = ArrayTypes->get(AVT);
+ TypeMapLock->reader_release();
+
+ if (!AT) {
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+
+ // Recheck. Might have changed between release and acquire.
+ AT = ArrayTypes->get(AVT);
+ if (!AT) {
+ // Value not found. Derive a new type!
+ ArrayTypes->add(AVT, AT = new ArrayType(ElementType, NumElements));
+ }
+ }
#ifdef DEBUG_MERGE_TYPES
DOUT << "Derived new type: " << *AT << "\n";
#endif
@@ -1106,19 +1205,27 @@ public:
}
};
}
-static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
+static ManagedStatic<TypeMap<VectorValType, VectorType> > VectorTypes;
VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
assert(ElementType && "Can't get vector of <null> types!");
VectorValType PVT(ElementType, NumElements);
- VectorType *PT = VectorTypes->get(PVT);
- if (PT) return PT; // Found a match, return it!
-
- // Value not found. Derive a new type!
- VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
-
+ VectorType *PT = 0;
+
+ TypeMapLock->reader_acquire();
+ PT = VectorTypes->get(PVT);
+ TypeMapLock->reader_release();
+
+ if (!PT) {
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+ PT = VectorTypes->get(PVT);
+ // Recheck. Might have changed between release and acquire.
+ if (!PT) {
+ VectorTypes->add(PVT, PT = new VectorType(ElementType, NumElements));
+ }
+ }
#ifdef DEBUG_MERGE_TYPES
DOUT << "Derived new type: " << *PT << "\n";
#endif
@@ -1173,15 +1280,24 @@ static ManagedStatic<TypeMap<StructValType, StructType> > StructTypes;
StructType *StructType::get(const std::vector<const Type*> &ETypes,
bool isPacked) {
StructValType STV(ETypes, isPacked);
- StructType *ST = StructTypes->get(STV);
- if (ST) return ST;
-
- // Value not found. Derive a new type!
- ST = (StructType*) operator new(sizeof(StructType) +
- sizeof(PATypeHandle) * ETypes.size());
- new (ST) StructType(ETypes, isPacked);
- StructTypes->add(STV, ST);
-
+ StructType *ST = 0;
+
+ TypeMapLock->reader_acquire();
+ ST = StructTypes->get(STV);
+ TypeMapLock->reader_release();
+
+ if (!ST) {
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+ ST = StructTypes->get(STV);
+ // Recheck. Might have changed between release and acquire.
+ if (!ST) {
+ // Value not found. Derive a new type!
+ ST = (StructType*) operator new(sizeof(StructType) +
+ sizeof(PATypeHandle) * ETypes.size());
+ new (ST) StructType(ETypes, isPacked);
+ StructTypes->add(STV, ST);
+ }
+ }
#ifdef DEBUG_MERGE_TYPES
DOUT << "Derived new type: " << *ST << "\n";
#endif
@@ -1249,12 +1365,21 @@ PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
PointerValType PVT(ValueType, AddressSpace);
- PointerType *PT = PointerTypes->get(PVT);
- if (PT) return PT;
-
- // Value not found. Derive a new type!
- PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
-
+ PointerType *PT = 0;
+
+ TypeMapLock->reader_acquire();
+ PT = PointerTypes->get(PVT);
+ TypeMapLock->reader_release();
+
+ if (!PT) {
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+ PT = PointerTypes->get(PVT);
+ // Recheck. Might have changed between release and acquire.
+ if (!PT) {
+ // Value not found. Derive a new type!
+ PointerTypes->add(PVT, PT = new PointerType(ValueType, AddressSpace));
+ }
+ }
#ifdef DEBUG_MERGE_TYPES
DOUT << "Derived new type: " << *PT << "\n";
#endif
@@ -1281,12 +1406,24 @@ bool PointerType::isValidElementType(const Type *ElemTy) {
// Derived Type Refinement Functions
//===----------------------------------------------------------------------===//
+// addAbstractTypeUser - Notify an abstract type that there is a new user of
+// it. This function is called primarily by the PATypeHandle class.
+void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
+ assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
+ AbstractTypeUsersLock->acquire();
+ AbstractTypeUsers.push_back(U);
+ AbstractTypeUsersLock->release();
+}
+
+
// removeAbstractTypeUser - Notify an abstract type that a user of the class
// no longer has a handle to the type. This function is called primarily by
// the PATypeHandle class. When there are no users of the abstract type, it
// is annihilated, because there is no way to get a reference to it ever again.
//
void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
+ AbstractTypeUsersLock->acquire();
+
// Search from back to front because we will notify users from back to
// front. Also, it is likely that there will be a stack like behavior to
// users that register and unregister users.
@@ -1310,16 +1447,20 @@ void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
DOUT << "DELETEing unused abstract type: <" << *this
<< ">[" << (void*)this << "]" << "\n";
#endif
- this->destroy();
+
+ this->destroy();
}
+
+ AbstractTypeUsersLock->release();
}
-// refineAbstractTypeTo - This function is used when it is discovered that
-// the 'this' abstract type is actually equivalent to the NewType specified.
-// This causes all users of 'this' to switch to reference the more concrete type
-// NewType and for 'this' to be deleted.
+// unlockedRefineAbstractTypeTo - This function is used when it is discovered
+// that the 'this' abstract type is actually equivalent to the NewType
+// specified. This causes all users of 'this' to switch to reference the more
+// concrete type NewType and for 'this' to be deleted. Only used for internal
+// callers.
//
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+void DerivedType::unlockedRefineAbstractTypeTo(const Type *NewType) {
assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
assert(this != NewType && "Can't refine to myself!");
assert(ForwardType == 0 && "This type has already been refined!");
@@ -1338,8 +1479,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
// refined, that we will not continue using a dead reference...
//
PATypeHolder NewTy(NewType);
-
- // Any PATypeHolders referring to this type will now automatically forward to
+ // Any PATypeHolders referring to this type will now automatically forward o
// the type we are resolved to.
ForwardType = NewType;
if (NewType->isAbstract())
@@ -1362,6 +1502,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
// will not cause users to drop off of the use list. If we resolve to ourself
// we succeed!
//
+ AbstractTypeUsersLock->acquire();
while (!AbstractTypeUsers.empty() && NewTy != this) {
AbstractTypeUser *User = AbstractTypeUsers.back();
@@ -1377,6 +1518,7 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
assert(AbstractTypeUsers.size() != OldSize &&
"AbsTyUser did not remove self from user list!");
}
+ AbstractTypeUsersLock->release();
// If we were successful removing all users from the type, 'this' will be
// deleted when the last PATypeHolder is destroyed or updated from this type.
@@ -1384,6 +1526,16 @@ void DerivedType::refineAbstractTypeTo(const Type *NewType) {
// destroyed.
}
+// refineAbstractTypeTo - This function is used by external callers to notify
+// us that this abstract type is equivalent to another type.
+//
+void DerivedType::refineAbstractTypeTo(const Type *NewType) {
+ // All recursive calls will go through unlockedRefineAbstractTypeTo,
+ // to avoid deadlock problems.
+ sys::SmartScopedWriter<true> Writer(&*TypeMapLock);
+ unlockedRefineAbstractTypeTo(NewType);
+}
+
// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
// the current type has transitioned from being abstract to being concrete.
//
@@ -1392,6 +1544,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
DOUT << "typeIsREFINED type: " << (void*)this << " " << *this << "\n";
#endif
+ AbstractTypeUsersLock->acquire();
unsigned OldSize = AbstractTypeUsers.size(); OldSize=OldSize;
while (!AbstractTypeUsers.empty()) {
AbstractTypeUser *ATU = AbstractTypeUsers.back();
@@ -1400,6 +1553,7 @@ void DerivedType::notifyUsesThatTypeBecameConcrete() {
assert(AbstractTypeUsers.size() < OldSize-- &&
"AbstractTypeUser did not remove itself from the use list!");
}
+ AbstractTypeUsersLock->release();
}
// refineAbstractType - Called when a contained type is found to be more
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 475d719..5ae60e2 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -14,13 +14,18 @@
#include "llvm/TypeSymbolTable.h"
#include "llvm/DerivedTypes.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/Streams.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
#include <algorithm>
using namespace llvm;
#define DEBUG_SYMBOL_TABLE 0
#define DEBUG_ABSTYPE 0
+static ManagedStatic<sys::SmartRWMutex<true> > TypeSymbolTableLock;
+
TypeSymbolTable::~TypeSymbolTable() {
// Drop all abstract type references in the type plane...
for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
@@ -31,6 +36,9 @@ TypeSymbolTable::~TypeSymbolTable() {
std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
std::string TryName = BaseName;
+
+ sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+
const_iterator End = tmap.end();
// See if the name exists
@@ -41,16 +49,20 @@ std::string TypeSymbolTable::getUniqueName(const std::string &BaseName) const {
// lookup a type by name - returns null on failure
Type* TypeSymbolTable::lookup(const std::string& Name) const {
+ sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+
const_iterator TI = tmap.find(Name);
+ Type* result = 0;
if (TI != tmap.end())
- return const_cast<Type*>(TI->second);
- return 0;
+ result = const_cast<Type*>(TI->second);
+ return result;
}
// remove - Remove a type from the symbol table...
Type* TypeSymbolTable::remove(iterator Entry) {
+ TypeSymbolTableLock->writer_acquire();
+
assert(Entry != tmap.end() && "Invalid entry to remove!");
-
const Type* Result = Entry->second;
#if DEBUG_SYMBOL_TABLE
@@ -59,6 +71,8 @@ Type* TypeSymbolTable::remove(iterator Entry) {
#endif
tmap.erase(Entry);
+
+ TypeSymbolTableLock->writer_release();
// If we are removing an abstract type, remove the symbol table from it's use
// list...
@@ -79,6 +93,8 @@ Type* TypeSymbolTable::remove(iterator Entry) {
void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
assert(T && "Can't insert null type into symbol table!");
+ TypeSymbolTableLock->writer_acquire();
+
if (tmap.insert(make_pair(Name, T)).second) {
// Type inserted fine with no conflict.
@@ -103,6 +119,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
// Insert the tmap entry
tmap.insert(make_pair(UniqueName, T));
}
+
+ TypeSymbolTableLock->writer_release();
// If we are adding an abstract type, add the symbol table to it's use list.
if (T->isAbstract()) {
@@ -116,7 +134,8 @@ void TypeSymbolTable::insert(const std::string& Name, const Type* T) {
// This function is called when one of the types in the type plane are refined
void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
const Type *NewType) {
-
+ sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
+
// Loop over all of the types in the symbol table, replacing any references
// to OldType with references to NewType. Note that there may be multiple
// occurrences, and although we only need to remove one at a time, it's
@@ -146,6 +165,7 @@ void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
// Loop over all of the types in the symbol table, dropping any abstract
// type user entries for AbsTy which occur because there are names for the
// type.
+ sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
AbsTy->removeAbstractTypeUser(this);
@@ -159,6 +179,7 @@ static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
void TypeSymbolTable::dump() const {
cerr << "TypeSymbolPlane: ";
+ sys::SmartScopedReader<true> Reader(&*TypeSymbolTableLock);
for_each(tmap.begin(), tmap.end(), DumpTypes);
}
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 3af161f..c952b78 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -22,6 +22,8 @@
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/ValueHandle.h"
+#include "llvm/System/RWMutex.h"
+#include "llvm/System/Threading.h"
#include "llvm/ADT/DenseMap.h"
#include <algorithm>
using namespace llvm;
@@ -405,6 +407,7 @@ Value *Value::DoPHITranslation(const BasicBlock *CurBB,
/// not a value has an entry in this map.
typedef DenseMap<Value*, ValueHandleBase*> ValueHandlesTy;
static ManagedStatic<ValueHandlesTy> ValueHandles;
+static ManagedStatic<sys::SmartRWMutex<true> > ValueHandlesLock;
/// AddToExistingUseList - Add this ValueHandle to the use list for VP, where
/// List is known to point into the existing use list.
@@ -427,9 +430,11 @@ void ValueHandleBase::AddToUseList() {
if (VP->HasValueHandle) {
// If this value already has a ValueHandle, then it must be in the
// ValueHandles map already.
+ sys::SmartScopedReader<true> Reader(&*ValueHandlesLock);
ValueHandleBase *&Entry = (*ValueHandles)[VP];
assert(Entry != 0 && "Value doesn't have any handles?");
- return AddToExistingUseList(&Entry);
+ AddToExistingUseList(&Entry);
+ return;
}
// Ok, it doesn't have any handles yet, so we must insert it into the
@@ -437,6 +442,7 @@ void ValueHandleBase::AddToUseList() {
// reallocate itself, which would invalidate all of the PrevP pointers that
// point into the old table. Handle this by checking for reallocation and
// updating the stale pointers only if needed.
+ sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
ValueHandlesTy &Handles = *ValueHandles;
const void *OldBucketPtr = Handles.getPointerIntoBucketsArray();
@@ -448,8 +454,9 @@ void ValueHandleBase::AddToUseList() {
// If reallocation didn't happen or if this was the first insertion, don't
// walk the table.
if (Handles.isPointerIntoBucketsArray(OldBucketPtr) ||
- Handles.size() == 1)
+ Handles.size() == 1) {
return;
+ }
// Okay, reallocation did happen. Fix the Prev Pointers.
for (ValueHandlesTy::iterator I = Handles.begin(), E = Handles.end();
@@ -477,6 +484,7 @@ void ValueHandleBase::RemoveFromUseList() {
// If the Next pointer was null, then it is possible that this was the last
// ValueHandle watching VP. If so, delete its entry from the ValueHandles
// map.
+ sys::SmartScopedWriter<true> Writer(&*ValueHandlesLock);
ValueHandlesTy &Handles = *ValueHandles;
if (Handles.isPointerIntoBucketsArray(PrevPtr)) {
Handles.erase(VP);
@@ -490,7 +498,9 @@ void ValueHandleBase::ValueIsDeleted(Value *V) {
// Get the linked list base, which is guaranteed to exist since the
// HasValueHandle flag is set.
+ ValueHandlesLock->reader_acquire();
ValueHandleBase *Entry = (*ValueHandles)[V];
+ ValueHandlesLock->reader_release();
assert(Entry && "Value bit set but no entries exist");
while (Entry) {
@@ -528,7 +538,9 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
// Get the linked list base, which is guaranteed to exist since the
// HasValueHandle flag is set.
+ ValueHandlesLock->reader_acquire();
ValueHandleBase *Entry = (*ValueHandles)[Old];
+ ValueHandlesLock->reader_release();
assert(Entry && "Value bit set but no entries exist");
while (Entry) {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index e9f2acd..10816e6 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -745,8 +745,8 @@ void Verifier::visitTruncInst(TruncInst &I) {
const Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
Assert1(SrcTy->isIntOrIntVector(), "Trunc only operates on integer", &I);
Assert1(DestTy->isIntOrIntVector(), "Trunc only produces integer", &I);
@@ -767,8 +767,8 @@ void Verifier::visitZExtInst(ZExtInst &I) {
Assert1(DestTy->isIntOrIntVector(), "ZExt only produces an integer", &I);
Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
"zext source and destination must both be a vector or neither", &I);
- unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
Assert1(SrcBitSize < DestBitSize,"Type too small for ZExt", &I);
@@ -781,8 +781,8 @@ void Verifier::visitSExtInst(SExtInst &I) {
const Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
Assert1(SrcTy->isIntOrIntVector(), "SExt only operates on integer", &I);
Assert1(DestTy->isIntOrIntVector(), "SExt only produces an integer", &I);
@@ -798,8 +798,8 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
Assert1(SrcTy->isFPOrFPVector(),"FPTrunc only operates on FP", &I);
Assert1(DestTy->isFPOrFPVector(),"FPTrunc only produces an FP", &I);
@@ -816,8 +816,8 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
const Type *DestTy = I.getType();
// Get the size of the types in bits, we'll need this later
- unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits();
- unsigned DestBitSize = DestTy->getPrimitiveSizeInBits();
+ unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
+ unsigned DestBitSize = DestTy->getScalarSizeInBits();
Assert1(SrcTy->isFPOrFPVector(),"FPExt only operates on FP", &I);
Assert1(DestTy->isFPOrFPVector(),"FPExt only produces an FP", &I);
OpenPOWER on IntegriCloud