summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-04-02 08:54:30 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-04-02 08:54:30 +0000
commit20e856b2a58d12231aa42d5d13888b15ac03e5a4 (patch)
treecf5763d092b81cecc168fa28032247ee495d06e2 /lib
parent2f2afc1aae898651e26987a5c71f3febb19bca98 (diff)
downloadFreeBSD-src-20e856b2a58d12231aa42d5d13888b15ac03e5a4.zip
FreeBSD-src-20e856b2a58d12231aa42d5d13888b15ac03e5a4.tar.gz
Update LLVM to r100181.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CaptureTracking.cpp2
-rw-r--r--lib/Analysis/DebugInfo.cpp5
-rw-r--r--lib/Analysis/IPA/CallGraphSCCPass.cpp17
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp6
-rw-r--r--lib/Analysis/LiveValues.cpp2
-rw-r--r--lib/Analysis/LoopPass.cpp18
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp2
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp2
-rw-r--r--lib/Analysis/ProfileInfo.cpp18
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp2
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp6
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp13
-rw-r--r--lib/Archive/ArchiveWriter.cpp2
-rw-r--r--lib/AsmParser/LLLexer.h2
-rw-r--r--lib/AsmParser/LLParser.cpp73
-rw-r--r--lib/AsmParser/LLParser.h13
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp10
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp31
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h38
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp287
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h32
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp61
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfWriter.cpp17
-rw-r--r--lib/CodeGen/BranchFolding.cpp12
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp322
-rw-r--r--lib/CodeGen/LiveInterval.cpp15
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp19
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp2
-rw-r--r--lib/CodeGen/LiveVariables.cpp10
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp64
-rw-r--r--lib/CodeGen/MachineCSE.cpp20
-rw-r--r--lib/CodeGen/MachineFunction.cpp4
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp6
-rw-r--r--lib/CodeGen/OptimizeExts.cpp25
-rw-r--r--lib/CodeGen/PHIElimination.cpp2
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp2
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp584
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp72
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp13
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp272
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h36
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h14
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp137
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp284
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp117
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp21
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp117
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp4
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp12
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp22
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp83
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp2
-rw-r--r--lib/MC/MCAsmStreamer.cpp20
-rw-r--r--lib/MC/MCAssembler.cpp615
-rw-r--r--lib/MC/MCContext.cpp20
-rw-r--r--lib/MC/MCExpr.cpp26
-rw-r--r--lib/MC/MCInst.cpp17
-rw-r--r--lib/MC/MCMachOStreamer.cpp89
-rw-r--r--lib/MC/MCParser/AsmParser.cpp4
-rw-r--r--lib/MC/MCSection.cpp6
-rw-r--r--lib/MC/MachObjectWriter.cpp158
-rw-r--r--lib/Support/APFloat.cpp284
-rw-r--r--lib/Support/APInt.cpp79
-rw-r--r--lib/Support/CommandLine.cpp4
-rw-r--r--lib/Support/Debug.cpp3
-rw-r--r--lib/Support/ErrorHandling.cpp5
-rw-r--r--lib/Support/MemoryBuffer.cpp43
-rw-r--r--lib/Support/Statistic.cpp65
-rw-r--r--lib/Support/Timer.cpp504
-rw-r--r--lib/Support/Triple.cpp2
-rw-r--r--lib/Support/raw_ostream.cpp46
-rw-r--r--lib/System/Unix/Mutex.inc6
-rw-r--r--lib/System/Unix/Path.inc4
-rw-r--r--lib/System/Win32/Program.inc34
-rw-r--r--lib/System/Win32/Signals.inc1
-rw-r--r--lib/Target/ARM/ARM.td21
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp31
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp96
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp3
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td195
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td2
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td1169
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td8
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp15
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp11
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp4
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp18
-rw-r--r--lib/Target/ARM/README.txt3
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp4
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp15
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td27
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsics.td6
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp6
-rw-r--r--lib/Target/CellSPU/SPU.h67
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp10
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp5
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp15
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td13
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp87
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h19
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td4
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp2
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp4
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp5
-rw-r--r--lib/Target/Mangler.cpp5
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td14
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp15
-rw-r--r--lib/Target/PIC16/PIC16InstrInfo.cpp5
-rw-r--r--lib/Target/PIC16/PIC16Section.cpp18
-rw-r--r--lib/Target/PIC16/PIC16Section.h17
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp13
-rw-r--r--lib/Target/PowerPC/PPCBranchSelector.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp20
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.h13
-rw-r--r--lib/Target/PowerPC/PPCInstrAltivec.td17
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp15
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp30
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td12
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp4
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td64
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp12
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/SSEDomainFix.cpp499
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td15
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp131
-rw-r--r--lib/Target/X86/X86FastISel.cpp4
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp214
-rw-r--r--lib/Target/X86/X86ISelLowering.h15
-rw-r--r--lib/Target/X86/X86Instr64bit.td285
-rw-r--r--lib/Target/X86/X86InstrFormats.td69
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp79
-rw-r--r--lib/Target/X86/X86InstrInfo.h14
-rw-r--r--lib/Target/X86/X86InstrInfo.td734
-rw-r--r--lib/Target/X86/X86InstrMMX.td13
-rw-r--r--lib/Target/X86/X86InstrSSE.td128
-rw-r--r--lib/Target/X86/X86Subtarget.cpp4
-rw-r--r--lib/Target/X86/X86Subtarget.h4
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp10
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp15
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp15
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp97
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp74
-rw-r--r--lib/Transforms/IPO/PruneEH.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp2
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp253
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp8
-rw-r--r--lib/Transforms/Scalar/GVN.cpp14
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp34
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp7
-rw-r--r--lib/Transforms/Scalar/Reg2Mem.cpp2
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp19
-rw-r--r--lib/Transforms/Scalar/SimplifyCFGPass.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp26
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp5
-rw-r--r--lib/Transforms/Utils/BreakCriticalEdges.cpp2
-rw-r--r--lib/Transforms/Utils/BuildLibCalls.cpp28
-rw-r--r--lib/Transforms/Utils/LowerInvoke.cpp4
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp2
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp497
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp8
-rw-r--r--lib/VMCore/AsmWriter.cpp9
-rw-r--r--lib/VMCore/AutoUpgrade.cpp17
-rw-r--r--lib/VMCore/CMakeLists.txt2
-rw-r--r--lib/VMCore/Constants.cpp20
-rw-r--r--lib/VMCore/ConstantsContext.h55
-rw-r--r--lib/VMCore/Core.cpp8
-rw-r--r--lib/VMCore/DebugLoc.cpp288
-rw-r--r--lib/VMCore/Function.cpp18
-rw-r--r--lib/VMCore/Globals.cpp4
-rw-r--r--lib/VMCore/IRBuilder.cpp13
-rw-r--r--lib/VMCore/InlineAsm.cpp25
-rw-r--r--lib/VMCore/Instruction.cpp2
-rw-r--r--lib/VMCore/Instructions.cpp39
-rw-r--r--lib/VMCore/LLVMContext.cpp57
-rw-r--r--lib/VMCore/LLVMContextImpl.cpp103
-rw-r--r--lib/VMCore/LLVMContextImpl.h119
-rw-r--r--lib/VMCore/Metadata.cpp143
-rw-r--r--lib/VMCore/Module.cpp2
-rw-r--r--lib/VMCore/PassManager.cpp54
-rw-r--r--lib/VMCore/Type.cpp10
-rw-r--r--lib/VMCore/Value.cpp6
-rw-r--r--lib/VMCore/ValueSymbolTable.cpp10
-rw-r--r--lib/VMCore/Verifier.cpp87
198 files changed, 7087 insertions, 4629 deletions
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 8767c18..0478258 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -49,7 +49,7 @@ bool llvm::PointerMayBeCaptured(const Value *V,
SmallSet<Use*, Threshold> Visited;
int Count = 0;
- for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
// If there are lots of uses, conservatively say that the value
// is captured to avoid taking too much compile time.
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index fda69ac..f12552d 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -96,9 +96,8 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
if (DbgNode == 0)
return DIDescriptor();
- if (Elt < DbgNode->getNumOperands() && DbgNode->getOperand(Elt))
- return DIDescriptor(dyn_cast<MDNode>(DbgNode->getOperand(Elt)));
-
+ if (Elt < DbgNode->getNumOperands())
+ return DIDescriptor(dyn_cast_or_null<MDNode>(DbgNode->getOperand(Elt)));
return DIDescriptor();
}
diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp
index 0e333d1..0f39f44 100644
--- a/lib/Analysis/IPA/CallGraphSCCPass.cpp
+++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp
@@ -17,12 +17,13 @@
#define DEBUG_TYPE "cgscc-passmgr"
#include "llvm/CallGraphSCCPass.h"
+#include "llvm/IntrinsicInst.h"
+#include "llvm/Function.h"
+#include "llvm/PassManagers.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/ADT/SCCIterator.h"
-#include "llvm/PassManagers.h"
-#include "llvm/Function.h"
#include "llvm/Support/Debug.h"
-#include "llvm/IntrinsicInst.h"
+#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
@@ -102,9 +103,10 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
CallGraphUpToDate = true;
}
- Timer *T = StartPassTimer(CGSP);
- Changed = CGSP->runOnSCC(CurSCC);
- StopPassTimer(CGSP, T);
+ {
+ TimeRegion PassTimer(getPassTimer(CGSP));
+ Changed = CGSP->runOnSCC(CurSCC);
+ }
// After the CGSCCPass is done, when assertions are enabled, use
// RefreshCallGraph to verify that the callgraph was correctly updated.
@@ -125,9 +127,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, std::vector<CallGraphNode*> &CurSCC,
for (unsigned i = 0, e = CurSCC.size(); i != e; ++i) {
if (Function *F = CurSCC[i]->getFunction()) {
dumpPassInfo(P, EXECUTION_MSG, ON_FUNCTION_MSG, F->getName());
- Timer *T = StartPassTimer(FPP);
+ TimeRegion PassTimer(getPassTimer(FPP));
Changed |= FPP->runOnFunction(*F);
- StopPassTimer(FPP, T);
}
}
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index 7b43089..b14afa3 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -257,7 +257,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
} else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
// Make sure that this is just the function being called, not that it is
// passing into the function.
- for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
+ for (unsigned i = 0, e = II->getNumOperands() - 3; i != e; ++i)
if (II->getOperand(i) == V) return true;
} else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
if (CE->getOpcode() == Instruction::GetElementPtr ||
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 5b8b534..c599e90 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -255,9 +255,11 @@ InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS,
Function *Caller = TheCall->getParent()->getParent();
// Don't inline functions which can be redefined at link-time to mean
- // something else. Don't inline functions marked noinline.
+ // something else. Don't inline functions marked noinline or call sites
+ // marked noinline.
if (Callee->mayBeOverridden() ||
- Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee))
+ Callee->hasFnAttr(Attribute::NoInline) || NeverInline.count(Callee) ||
+ CS.isNoInline())
return llvm::InlineCost::getNever();
// InlineCost - This value measures how good of an inline candidate this call
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 1b91d93..23964ff 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -125,7 +125,7 @@ LiveValues::Memo &LiveValues::compute(const Value *V) {
bool LiveOutOfDefBB = false;
// Examine each use of the value.
- for (Value::use_const_iterator I = V->use_begin(), E = V->use_end();
+ for (Value::const_use_iterator I = V->use_begin(), E = V->use_end();
I != E; ++I) {
const User *U = *I;
const BasicBlock *UseBB = cast<Instruction>(U)->getParent();
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index 2d613f6..e2d2c2b 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -14,6 +14,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Support/Timer.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -221,22 +222,22 @@ bool LPPassManager::runOnFunction(Function &F) {
LoopPass *P = (LoopPass*)getContainedPass(Index);
dumpPassInfo(P, EXECUTION_MSG, ON_LOOP_MSG,
- CurrentLoop->getHeader()->getNameStr());
+ CurrentLoop->getHeader()->getName());
dumpRequiredSet(P);
initializeAnalysisImpl(P);
{
PassManagerPrettyStackEntry X(P, *CurrentLoop->getHeader());
- Timer *T = StartPassTimer(P);
+ TimeRegion PassTimer(getPassTimer(P));
+
Changed |= P->runOnLoop(CurrentLoop, *this);
- StopPassTimer(P, T);
}
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
skipThisLoop ? "<deleted>" :
- CurrentLoop->getHeader()->getNameStr());
+ CurrentLoop->getHeader()->getName());
dumpPreservedSet(P);
if (!skipThisLoop) {
@@ -245,9 +246,10 @@ bool LPPassManager::runOnFunction(Function &F) {
// is a function pass and it's really expensive to verify every
// loop in the function every time. That level of checking can be
// enabled with the -verify-loop-info option.
- Timer *T = StartPassTimer(LI);
- CurrentLoop->verifyLoop();
- StopPassTimer(LI, T);
+ {
+ TimeRegion PassTimer(getPassTimer(LI));
+ CurrentLoop->verifyLoop();
+ }
// Then call the regular verifyAnalysis functions.
verifyPreservedAnalysis(P);
@@ -257,7 +259,7 @@ bool LPPassManager::runOnFunction(Function &F) {
recordAvailableAnalysis(P);
removeDeadPasses(P,
skipThisLoop ? "<deleted>" :
- CurrentLoop->getHeader()->getNameStr(),
+ CurrentLoop->getHeader()->getName(),
ON_LOOP_MSG);
if (skipThisLoop)
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 297b588..89f9743 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -139,7 +139,7 @@ const PointerType *llvm::getMallocType(const CallInst *CI) {
unsigned NumOfBitCastUses = 0;
// Determine if CallInst has a bitcast use.
- for (Value::use_const_iterator UI = CI->use_begin(), E = CI->use_end();
+ for (Value::const_use_iterator UI = CI->use_begin(), E = CI->use_end();
UI != E; )
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(*UI++)) {
MallocType = cast<PointerType>(BCI->getDestTy());
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index bce6b31..da4ce47 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -398,7 +398,7 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
const BasicBlock *BB = &(*FI);
BlockInformation[&F][BB] = 0;
- pred_const_iterator predi = pred_begin(BB), prede = pred_end(BB);
+ const_pred_iterator predi = pred_begin(BB), prede = pred_end(BB);
if (predi == prede) {
Edge e = getEdge(0,BB);
setEdgeWeight(e,0);
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 85531be..662576e 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -67,7 +67,7 @@ ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
double Count = MissingValue;
- pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
// Are there zero predecessors of this block?
if (PI == PE) {
@@ -508,7 +508,7 @@ bool ProfileInfoT<Function,BasicBlock>::
// have no value
double incount = 0;
SmallSet<const BasicBlock*,8> pred_visited;
- pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
if (bbi==bbe) {
Edge e = getEdge(0,BB);
incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
@@ -582,7 +582,7 @@ bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *B
double inWeight = 0;
std::set<Edge> inMissing;
std::set<const BasicBlock*> ProcessedPreds;
- pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
if (bbi == bbe) {
readEdge(this,getEdge(0,BB),inWeight,inMissing);
}
@@ -639,7 +639,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
// FI != FE; ++FI) {
// const BasicBlock* BB = &(*FI);
// {
-// pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
// if (NBB == End) {
// setEdgeWeight(getEdge(0,BB),0);
// }
@@ -779,7 +779,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
// Calculate incoming flow.
double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
std::set<const BasicBlock *> Processed;
- for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
NBB != End; ++NBB) {
if (Processed.insert(*NBB).second) {
Edge e = getEdge(*NBB, BB);
@@ -869,7 +869,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
if (getEdgeWeight(e) == MissingValue) {
double iw = 0;
std::set<const BasicBlock *> Processed;
- for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
NBB != End; ++NBB) {
if (Processed.insert(*NBB).second) {
Edge e = getEdge(*NBB, BB);
@@ -893,7 +893,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
const BasicBlock *Dest;
Path P;
bool BackEdgeFound = false;
- for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
NBB != End; ++NBB) {
Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
if (Dest == *NBB) {
@@ -935,7 +935,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
// Calculate incoming flow.
double iw = 0;
std::set<const BasicBlock *> Processed;
- for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
NBB != End; ++NBB) {
if (Processed.insert(*NBB).second) {
Edge e = getEdge(*NBB, BB);
@@ -965,7 +965,7 @@ void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
while(FI != FE && !FoundPath) {
const BasicBlock *BB = *FI; ++FI;
- for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB);
NBB != End; ++NBB) {
Edge e = getEdge(*NBB,BB);
double w = getEdgeWeight(e);
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index ac9ed52..8ea4ecf 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -119,7 +119,7 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
bbi != bbe; ++bbi) {
recurseBasicBlock(*bbi);
}
- for (pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
bbi != bbe; ++bbi) {
recurseBasicBlock(*bbi);
}
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index a2ddc8e..5d87e14 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -96,8 +96,8 @@ namespace llvm {
double inWeight = 0;
int inCount = 0;
std::set<const BType*> ProcessedPreds;
- for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
+ for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
if (ProcessedPreds.insert(*bbi).second) {
typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
double EdgeWeight = PI->getEdgeWeight(E);
@@ -242,7 +242,7 @@ namespace llvm {
// Read predecessors.
std::set<const BType*> ProcessedPreds;
- pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+ const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
// If there are none, check for (0,BB) edge.
if (bpi == bpe) {
DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 138cdc6..2e18cea 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1268,19 +1268,8 @@ Value *SCEVExpander::expand(const SCEV *S) {
L = L->getParentLoop())
if (S->isLoopInvariant(L)) {
if (!L) break;
- if (BasicBlock *Preheader = L->getLoopPreheader()) {
+ if (BasicBlock *Preheader = L->getLoopPreheader())
InsertPt = Preheader->getTerminator();
- BasicBlock::iterator IP = InsertPt;
- // Back past any debug info instructions. Sometimes we inserted
- // something earlier before debug info but after any real instructions.
- // This should behave the same as if debug info was not present.
- while (IP != Preheader->begin()) {
- --IP;
- if (!isa<DbgInfoIntrinsic>(IP))
- break;
- InsertPt = IP;
- }
- }
} else {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp
index 58fbbf4..a02601a 100644
--- a/lib/Archive/ArchiveWriter.cpp
+++ b/lib/Archive/ArchiveWriter.cpp
@@ -220,7 +220,7 @@ Archive::writeMember(
}
// Now that we have the data in memory, update the
- // symbol table if its a bitcode file.
+ // symbol table if it's a bitcode file.
if (CreateSymbolTable && member.isBitcode()) {
std::vector<std::string> symbols;
std::string FullMemberName = archPath.str() + "(" + member.getPath().str()
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 3057992..70f1cfd 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -55,7 +55,7 @@ namespace llvm {
typedef SMLoc LocTy;
LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
lltok::Kind getKind() const { return CurKind; }
- const std::string getStrVal() const { return StrVal; }
+ const std::string &getStrVal() const { return StrVal; }
const Type *getTyVal() const { return TyVal; }
unsigned getUIntVal() const { return UIntVal; }
const APSInt &getAPSIntVal() const { return APSIntVal; }
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 8083a07..cdad077 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -39,6 +39,27 @@ bool LLParser::Run() {
/// ValidateEndOfModule - Do final validity and sanity checks at the end of the
/// module.
bool LLParser::ValidateEndOfModule() {
+ // Handle any instruction metadata forward references.
+ if (!ForwardRefInstMetadata.empty()) {
+ for (DenseMap<Instruction*, std::vector<MDRef> >::iterator
+ I = ForwardRefInstMetadata.begin(), E = ForwardRefInstMetadata.end();
+ I != E; ++I) {
+ Instruction *Inst = I->first;
+ const std::vector<MDRef> &MDList = I->second;
+
+ for (unsigned i = 0, e = MDList.size(); i != e; ++i) {
+ unsigned SlotNo = MDList[i].MDSlot;
+
+ if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0)
+ return Error(MDList[i].Loc, "use of undefined metadata '!" +
+ utostr(SlotNo) + "'");
+ Inst->setMetadata(MDList[i].MDKind, NumberedMetadata[SlotNo]);
+ }
+ }
+ ForwardRefInstMetadata.clear();
+ }
+
+
// Update auto-upgraded malloc calls to "malloc".
// FIXME: Remove in LLVM 3.0.
if (MallocF) {
@@ -472,18 +493,30 @@ bool LLParser::ParseMDString(MDString *&Result) {
// MDNode:
// ::= '!' MDNodeNumber
+//
+/// This version of ParseMDNodeID returns the slot number and null in the case
+/// of a forward reference.
+bool LLParser::ParseMDNodeID(MDNode *&Result, unsigned &SlotNo) {
+ // !{ ..., !42, ... }
+ if (ParseUInt32(SlotNo)) return true;
+
+ // Check existing MDNode.
+ if (SlotNo < NumberedMetadata.size() && NumberedMetadata[SlotNo] != 0)
+ Result = NumberedMetadata[SlotNo];
+ else
+ Result = 0;
+ return false;
+}
+
bool LLParser::ParseMDNodeID(MDNode *&Result) {
// !{ ..., !42, ... }
unsigned MID = 0;
- if (ParseUInt32(MID)) return true;
+ if (ParseMDNodeID(Result, MID)) return true;
- // Check existing MDNode.
- if (MID < NumberedMetadata.size() && NumberedMetadata[MID] != 0) {
- Result = NumberedMetadata[MID];
- return false;
- }
+ // If not a forward reference, just return it now.
+ if (Result) return false;
- // Create MDNode forward reference.
+ // Otherwise, create MDNode forward reference.
// FIXME: This is not unique enough!
std::string FwdRefName = "llvm.mdnode.fwdref." + utostr(MID);
@@ -1078,9 +1111,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
/// ParseInstructionMetadata
/// ::= !dbg !42 (',' !dbg !57)*
-bool LLParser::
-ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned,
- MDNode *> > &Result){
+bool LLParser::ParseInstructionMetadata(Instruction *Inst) {
do {
if (Lex.getKind() != lltok::MetadataVar)
return TokError("expected metadata after comma");
@@ -1089,12 +1120,21 @@ ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned,
Lex.Lex();
MDNode *Node;
+ unsigned NodeID;
+ SMLoc Loc = Lex.getLoc();
if (ParseToken(lltok::exclaim, "expected '!' here") ||
- ParseMDNodeID(Node))
+ ParseMDNodeID(Node, NodeID))
return true;
unsigned MDK = M->getMDKindID(Name.c_str());
- Result.push_back(std::make_pair(MDK, Node));
+ if (Node) {
+ // If we got the node, add it to the instruction.
+ Inst->setMetadata(MDK, Node);
+ } else {
+ MDRef R = { Loc, MDK, NodeID };
+ // Otherwise, remember that this should be resolved later.
+ ForwardRefInstMetadata[Inst].push_back(R);
+ }
// If this is the end of the list, we're done.
} while (EatIfPresent(lltok::comma));
@@ -2896,22 +2936,17 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
// With a normal result, we check to see if the instruction is followed by
// a comma and metadata.
if (EatIfPresent(lltok::comma))
- if (ParseInstructionMetadata(MetadataOnInst))
+ if (ParseInstructionMetadata(Inst))
return true;
break;
case InstExtraComma:
// If the instruction parser ate an extra comma at the end of it, it
// *must* be followed by metadata.
- if (ParseInstructionMetadata(MetadataOnInst))
+ if (ParseInstructionMetadata(Inst))
return true;
break;
}
- // Set metadata attached with this instruction.
- for (unsigned i = 0, e = MetadataOnInst.size(); i != e; ++i)
- Inst->setMetadata(MetadataOnInst[i].first, MetadataOnInst[i].second);
- MetadataOnInst.clear();
-
BB->getInstList().push_back(Inst);
// Set the name on the instruction.
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 9abe404..ae460bb 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -17,6 +17,7 @@
#include "LLLexer.h"
#include "llvm/Module.h"
#include "llvm/Type.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/Support/ValueHandle.h"
#include <map>
@@ -76,6 +77,14 @@ namespace llvm {
LLVMContext& Context;
LLLexer Lex;
Module *M;
+
+ // Instruction metadata resolution. Each instruction can have a list of
+ // MDRef info associated with them.
+ struct MDRef {
+ SMLoc Loc;
+ unsigned MDKind, MDSlot;
+ };
+ DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
// Type resolution handling data structures.
std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
@@ -171,8 +180,7 @@ namespace llvm {
bool ParseOptionalCallingConv(CallingConv::ID &CC);
bool ParseOptionalAlignment(unsigned &Alignment);
bool ParseOptionalStackAlignment(unsigned &Alignment);
- bool ParseInstructionMetadata(SmallVectorImpl<std::pair<unsigned,
- MDNode *> > &);
+ bool ParseInstructionMetadata(Instruction *Inst);
bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
@@ -204,6 +212,7 @@ namespace llvm {
bool ParseNamedMetadata();
bool ParseMDString(MDString *&Result);
bool ParseMDNodeID(MDNode *&Result);
+ bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
// Type Parsing.
bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index b9453c9..76d112e 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1527,12 +1527,16 @@ bool BitcodeReader::ParseModule() {
bool BitcodeReader::ParseBitcodeInto(Module *M) {
TheModule = 0;
- if (Buffer->getBufferSize() & 3)
- return Error("Bitcode stream should be a multiple of 4 bytes in length");
-
unsigned char *BufPtr = (unsigned char *)Buffer->getBufferStart();
unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+ if (Buffer->getBufferSize() & 3) {
+ if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd))
+ return Error("Invalid bitcode signature");
+ else
+ return Error("Bitcode stream should be a multiple of 4 bytes in length");
+ }
+
// If we have a wrapper header, parse it and ignore the non-bc file contents.
// The magic number is 0x0B17C0DE stored in little endian.
if (isBitcodeWrapper(BufPtr, BufEnd))
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 3ab2726..1f69e16 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1090,11 +1090,11 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
// Emit value #'s for the fixed parameters.
for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
- Vals.push_back(VE.getValueID(I.getOperand(i+3))); // fixed param.
+ Vals.push_back(VE.getValueID(I.getOperand(i))); // fixed param.
// Emit type/value pairs for varargs params.
if (FTy->isVarArg()) {
- for (unsigned i = 3+FTy->getNumParams(), e = I.getNumOperands();
+ for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3;
i != e; ++i)
PushValueAndType(I.getOperand(i), InstID, Vals, VE); // vararg
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 1d4f7f7..3e71d18 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -62,7 +62,7 @@ AsmPrinter::AsmPrinter(formatted_raw_ostream &o, TargetMachine &tm,
TM(tm), MAI(tm.getMCAsmInfo()), TRI(tm.getRegisterInfo()),
OutContext(Streamer.getContext()),
OutStreamer(Streamer),
- LastMI(0), LastFn(0), Counter(~0U), SetCounter(0), PrevDLT(NULL) {
+ LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) {
DW = 0; MMI = 0;
VerboseAsm = Streamer.isVerboseAsm();
}
@@ -922,8 +922,8 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo,
// Otherwise, emit with .set (aka assignment).
MCSymbol *SetLabel =
- OutContext.GetOrCreateTemporarySymbol(Twine(MAI->getPrivateGlobalPrefix()) +
- "set" + Twine(SetCounter++));
+ OutContext.GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) +
+ "set" + Twine(SetCounter++));
OutStreamer.EmitAssignment(SetLabel, Diff);
OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/);
}
@@ -1337,25 +1337,12 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI,
if (!MAI || !DW || !MAI->doesSupportDebugInformation()
|| !DW->ShouldEmitDwarfDebug())
return;
- if (MI->getOpcode() == TargetOpcode::DBG_VALUE)
- return;
- DebugLoc DL = MI->getDebugLoc();
- if (DL.isUnknown())
- return;
- DILocation CurDLT = MF->getDILocation(DL);
- if (!CurDLT.getScope().Verify())
- return;
- if (!BeforePrintingInsn) {
+ if (!BeforePrintingInsn)
// After printing instruction
DW->EndScope(MI);
- } else if (CurDLT.getNode() != PrevDLT) {
- MCSymbol *L = DW->RecordSourceLine(CurDLT.getLineNumber(),
- CurDLT.getColumnNumber(),
- CurDLT.getScope().getNode());
- DW->BeginScope(MI, L);
- PrevDLT = CurDLT.getNode();
- }
+ else
+ DW->BeginScope(MI);
}
@@ -1612,7 +1599,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- return OutContext.GetOrCreateTemporarySymbol
+ return OutContext.GetOrCreateSymbol
(Twine(MAI->getPrivateGlobalPrefix()) + "CPI" + Twine(getFunctionNumber())
+ "_" + Twine(CPID));
}
@@ -1625,7 +1612,7 @@ MCSymbol *AsmPrinter::GetJTISymbol(unsigned JTID, bool isLinkerPrivate) const {
/// GetJTSetSymbol - Return the symbol for the specified jump table .set
/// FIXME: privatize to AsmPrinter.
MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
- return OutContext.GetOrCreateTemporarySymbol
+ return OutContext.GetOrCreateSymbol
(Twine(MAI->getPrivateGlobalPrefix()) + Twine(getFunctionNumber()) + "_" +
Twine(UID) + "_set_" + Twine(MBBID));
}
@@ -1639,9 +1626,7 @@ MCSymbol *AsmPrinter::GetSymbolWithGlobalValueBase(const GlobalValue *GV,
SmallString<60> NameStr;
Mang->getNameWithPrefix(NameStr, GV, ForcePrivate);
NameStr.append(Suffix.begin(), Suffix.end());
- if (!GV->hasPrivateLinkage() && !ForcePrivate)
- return OutContext.GetOrCreateSymbol(NameStr.str());
- return OutContext.GetOrCreateTemporarySymbol(NameStr.str());
+ return OutContext.GetOrCreateSymbol(NameStr.str());
}
/// GetExternalSymbolSymbol - Return the MCSymbol for the specified
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index e97754e..e0e3ff7 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -19,6 +19,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
@@ -114,10 +115,11 @@ DIE::~DIE() {
/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
///
-void DIE::addSiblingOffset() {
- DIEInteger *DI = new DIEInteger(0);
+DIEValue *DIE::addSiblingOffset(BumpPtrAllocator &A) {
+ DIEInteger *DI = new (A) DIEInteger(0);
Values.insert(Values.begin(), DI);
Abbrev.AddFirstAttribute(dwarf::DW_AT_sibling, dwarf::DW_FORM_ref4);
+ return DI;
}
#ifndef NDEBUG
@@ -277,31 +279,6 @@ void DIELabel::print(raw_ostream &O) {
#endif
//===----------------------------------------------------------------------===//
-// DIESectionOffset Implementation
-//===----------------------------------------------------------------------===//
-
-/// EmitValue - Emit delta value.
-///
-void DIESectionOffset::EmitValue(DwarfPrinter *D, unsigned Form) const {
- bool IsSmall = Form == dwarf::DW_FORM_data4;
- D->EmitSectionOffset(Label, Section, IsSmall, IsEH);
-}
-
-/// SizeOf - Determine size of delta value in bytes.
-///
-unsigned DIESectionOffset::SizeOf(const TargetData *TD, unsigned Form) const {
- if (Form == dwarf::DW_FORM_data4) return 4;
- return TD->getPointerSize();
-}
-
-#ifndef NDEBUG
-void DIESectionOffset::print(raw_ostream &O) {
- O << "Off: " << Label->getName() << "-" << Section->getName()
- << "-" << IsEH;
-}
-#endif
-
-//===----------------------------------------------------------------------===//
// DIEDelta Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index c5909fa..8b27ed2 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -153,7 +153,7 @@ namespace llvm {
unsigned getOffset() const { return Offset; }
unsigned getSize() const { return Size; }
const std::vector<DIE *> &getChildren() const { return Children; }
- SmallVector<DIEValue*, 32> &getValues() { return Values; }
+ const SmallVector<DIEValue*, 32> &getValues() const { return Values; }
DIE *getParent() const { return Parent; }
void setTag(unsigned Tag) { Abbrev.setTag(Tag); }
void setOffset(unsigned O) { Offset = O; }
@@ -171,8 +171,10 @@ namespace llvm {
unsigned getSiblingOffset() const { return Offset + Size; }
/// addSiblingOffset - Add a sibling offset field to the front of the DIE.
+ /// The caller is responsible for deleting the return value at or after the
+ /// same time it destroys this DIE.
///
- void addSiblingOffset();
+ DIEValue *addSiblingOffset(BumpPtrAllocator &A);
/// addChild - Add a child to the DIE.
///
@@ -328,38 +330,6 @@ namespace llvm {
};
//===--------------------------------------------------------------------===//
- /// DIESectionOffset - A section offset DIE.
- ///
- class DIESectionOffset : public DIEValue {
- const MCSymbol *Label;
- const MCSymbol *Section;
- bool IsEH : 1;
- public:
- DIESectionOffset(const MCSymbol *Lab, const MCSymbol *Sec,
- bool isEH = false)
- : DIEValue(isSectionOffset), Label(Lab), Section(Sec),
- IsEH(isEH) {}
-
- /// EmitValue - Emit section offset.
- ///
- virtual void EmitValue(DwarfPrinter *D, unsigned Form) const;
-
- /// SizeOf - Determine size of section offset value in bytes.
- ///
- virtual unsigned SizeOf(const TargetData *TD, unsigned Form) const;
-
- // Implement isa/cast/dyncast.
- static bool classof(const DIESectionOffset *) { return true; }
- static bool classof(const DIEValue *D) {
- return D->getType() == isSectionOffset;
- }
-
-#ifndef NDEBUG
- virtual void print(raw_ostream &O);
-#endif
- };
-
- //===--------------------------------------------------------------------===//
/// DIEDelta - A simple label difference DIE.
///
class DIEDelta : public DIEValue {
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7153fe2..fb91d4f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -92,11 +92,11 @@ public:
/// addGlobal - Add a new global entity to the compile unit.
///
- void addGlobal(const std::string &Name, DIE *Die) { Globals[Name] = Die; }
+ void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; }
/// addGlobalType - Add a new global type to the compile unit.
///
- void addGlobalType(const std::string &Name, DIE *Die) {
+ void addGlobalType(StringRef Name, DIE *Die) {
GlobalTypes[Name] = Die;
}
@@ -149,20 +149,26 @@ class DbgVariable {
DIVariable Var; // Variable Descriptor.
unsigned FrameIndex; // Variable frame index.
const MachineInstr *DbgValueMInsn; // DBG_VALUE
+ // DbgValueLabel - DBG_VALUE is effective from this label.
+ MCSymbol *DbgValueLabel;
DbgVariable *const AbstractVar; // Abstract variable for this variable.
DIE *TheDIE;
public:
// AbsVar may be NULL.
DbgVariable(DIVariable V, unsigned I, DbgVariable *AbsVar)
- : Var(V), FrameIndex(I), DbgValueMInsn(0), AbstractVar(AbsVar), TheDIE(0) {}
+ : Var(V), FrameIndex(I), DbgValueMInsn(0),
+ DbgValueLabel(0), AbstractVar(AbsVar), TheDIE(0) {}
DbgVariable(DIVariable V, const MachineInstr *MI, DbgVariable *AbsVar)
- : Var(V), FrameIndex(0), DbgValueMInsn(MI), AbstractVar(AbsVar), TheDIE(0)
+ : Var(V), FrameIndex(0), DbgValueMInsn(MI), DbgValueLabel(0),
+ AbstractVar(AbsVar), TheDIE(0)
{}
// Accessors.
DIVariable getVariable() const { return Var; }
unsigned getFrameIndex() const { return FrameIndex; }
const MachineInstr *getDbgValue() const { return DbgValueMInsn; }
+ MCSymbol *getDbgValueLabel() const { return DbgValueLabel; }
+ void setDbgValueLabel(MCSymbol *L) { DbgValueLabel = L; }
DbgVariable *getAbstractVariable() const { return AbstractVar; }
void setDIE(DIE *D) { TheDIE = D; }
DIE *getDIE() const { return TheDIE; }
@@ -224,14 +230,14 @@ public:
void fixInstructionMarkers(DenseMap<const MachineInstr *,
unsigned> &MIIndexMap) {
- assert (getFirstInsn() && "First instruction is missing!");
+ assert(getFirstInsn() && "First instruction is missing!");
// Use the end of last child scope as end of this scope.
const SmallVector<DbgScope *, 4> &Scopes = getScopes();
const MachineInstr *LastInsn = getFirstInsn();
unsigned LIndex = 0;
if (Scopes.empty()) {
- assert (getLastInsn() && "Inner most scope does not have last insn!");
+ assert(getLastInsn() && "Inner most scope does not have last insn!");
return;
}
for (SmallVector<DbgScope *, 4>::const_iterator SI = Scopes.begin(),
@@ -295,15 +301,15 @@ DbgScope::~DbgScope() {
DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
: DwarfPrinter(OS, A, T), ModuleCU(0),
AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
- DIEValues(), SectionSourceLines(), didInitial(false), shouldEmit(false),
- CurrentFnDbgScope(0), DebugTimer(0) {
+ DIEBlocks(), SectionSourceLines(), didInitial(false), shouldEmit(false),
+ CurrentFnDbgScope(0), PrevDILoc(0), DebugTimer(0) {
NextStringPoolNumber = 0;
if (TimePassesIsEnabled)
DebugTimer = new Timer("Dwarf Debug Writer");
}
DwarfDebug::~DwarfDebug() {
- for (unsigned j = 0, M = DIEValues.size(); j < M; ++j)
- delete DIEValues[j];
+ for (unsigned j = 0, M = DIEBlocks.size(); j < M; ++j)
+ DIEBlocks[j]->~DIEBlock();
delete DebugTimer;
}
@@ -343,8 +349,7 @@ void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) {
/// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug
/// information entry.
DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
- DIEEntry *Value = new DIEEntry(Entry);
- DIEValues.push_back(Value);
+ DIEEntry *Value = new (DIEValueAllocator) DIEEntry(Entry);
return Value;
}
@@ -353,8 +358,7 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) {
void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
unsigned Form, uint64_t Integer) {
if (!Form) Form = DIEInteger::BestForm(false, Integer);
- DIEValue *Value = new DIEInteger(Integer);
- DIEValues.push_back(Value);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
Die->addValue(Attribute, Form, Value);
}
@@ -363,8 +367,7 @@ void DwarfDebug::addUInt(DIE *Die, unsigned Attribute,
void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
unsigned Form, int64_t Integer) {
if (!Form) Form = DIEInteger::BestForm(true, Integer);
- DIEValue *Value = new DIEInteger(Integer);
- DIEValues.push_back(Value);
+ DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer);
Die->addValue(Attribute, Form, Value);
}
@@ -372,8 +375,7 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
/// keeps string reference.
void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
StringRef String) {
- DIEValue *Value = new DIEString(String);
- DIEValues.push_back(Value);
+ DIEValue *Value = new (DIEValueAllocator) DIEString(String);
Die->addValue(Attribute, Form, Value);
}
@@ -381,18 +383,7 @@ void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
///
void DwarfDebug::addLabel(DIE *Die, unsigned Attribute, unsigned Form,
const MCSymbol *Label) {
- DIEValue *Value = new DIELabel(Label);
- DIEValues.push_back(Value);
- Die->addValue(Attribute, Form, Value);
-}
-
-/// addSectionOffset - Add a section offset label attribute data and value.
-///
-void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Label,const MCSymbol *Section,
- bool isEH) {
- DIEValue *Value = new DIESectionOffset(Label, Section, isEH);
- DIEValues.push_back(Value);
+ DIEValue *Value = new (DIEValueAllocator) DIELabel(Label);
Die->addValue(Attribute, Form, Value);
}
@@ -400,8 +391,7 @@ void DwarfDebug::addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
///
void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
const MCSymbol *Hi, const MCSymbol *Lo) {
- DIEValue *Value = new DIEDelta(Hi, Lo);
- DIEValues.push_back(Value);
+ DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo);
Die->addValue(Attribute, Form, Value);
}
@@ -410,7 +400,7 @@ void DwarfDebug::addDelta(DIE *Die, unsigned Attribute, unsigned Form,
void DwarfDebug::addBlock(DIE *Die, unsigned Attribute, unsigned Form,
DIEBlock *Block) {
Block->ComputeSize(TD);
- DIEValues.push_back(Block);
+ DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on.
Die->addValue(Attribute, Block->BestForm(), Block);
}
@@ -457,8 +447,8 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
unsigned Line = SP->getLineNumber();
if (!SP->getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(SP->getContext().getDirectory(),
- SP->getContext().getFilename());
+ unsigned FileID = GetOrCreateSourceID(SP->getDirectory(),
+ SP->getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -564,7 +554,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
// Decode the original location, and use that as the start of the byref
// variable's location.
unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
if (Location.isReg()) {
if (Reg < 32) {
@@ -696,15 +686,15 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
}
// Get the offsets for the forwarding field and the variable field.
- unsigned int forwardingFieldOffset =
+ unsigned forwardingFieldOffset =
DIDerivedType(forwardingField.getNode()).getOffsetInBits() >> 3;
- unsigned int varFieldOffset =
+ unsigned varFieldOffset =
DIDerivedType(varField.getNode()).getOffsetInBits() >> 3;
// Decode the original location, and use that as the start of the byref
// variable's location.
unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
if (Location.isReg()) {
if (Reg < 32)
@@ -759,7 +749,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
const MachineLocation &Location) {
unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
if (Location.isReg()) {
if (Reg < 32) {
@@ -1106,7 +1096,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
addSourceLine(MemberDie, &DT);
- DIEBlock *MemLocationDie = new DIEBlock();
+ DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock();
addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
uint64_t Size = DT.getSizeInBits();
@@ -1142,7 +1132,7 @@ DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
// expression to extract appropriate offset from vtable.
// BaseAddr = ObAddr + *((*ObAddr) - Offset)
- DIEBlock *VBaseLocationDie = new DIEBlock();
+ DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock();
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup);
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
@@ -1208,7 +1198,7 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
unsigned VK = SP.getVirtuality();
if (VK) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
@@ -1244,13 +1234,13 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
return SPDie;
}
-/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
-/// Initialize scope and update scope hierarchy.
+/// getUpdatedDbgScope - Find DbgScope assicated with the instruction.
+/// Update scope hierarchy. Create abstract scope if required.
DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
- MDNode *InlinedAt) {
- assert (N && "Invalid Scope encoding!");
- assert (MI && "Missing machine instruction!");
- bool GetConcreteScope = (MI && InlinedAt);
+ MDNode *InlinedAt) {
+ assert(N && "Invalid Scope encoding!");
+ assert(MI && "Missing machine instruction!");
+ bool isAConcreteScope = InlinedAt != 0;
DbgScope *NScope = NULL;
@@ -1258,17 +1248,17 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
NScope = DbgScopeMap.lookup(InlinedAt);
else
NScope = DbgScopeMap.lookup(N);
- assert (NScope && "Unable to find working scope!");
+ assert(NScope && "Unable to find working scope!");
if (NScope->getFirstInsn())
return NScope;
DbgScope *Parent = NULL;
- if (GetConcreteScope) {
+ if (isAConcreteScope) {
DILocation IL(InlinedAt);
Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
IL.getOrigLocation().getNode());
- assert (Parent && "Unable to find Parent scope!");
+ assert(Parent && "Unable to find Parent scope!");
NScope->setParent(Parent);
Parent->addScope(NScope);
} else if (DIDescriptor(N).isLexicalBlock()) {
@@ -1286,7 +1276,7 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
CurrentFnDbgScope = NScope;
}
- if (GetConcreteScope) {
+ if (isAConcreteScope) {
ConcreteScopes[InlinedAt] = NScope;
getOrCreateAbstractScope(N);
}
@@ -1295,7 +1285,7 @@ DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
}
DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
- assert (N && "Invalid Scope encoding!");
+ assert(N && "Invalid Scope encoding!");
DbgScope *AScope = AbstractScopes.lookup(N);
if (AScope)
@@ -1377,7 +1367,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
MCSymbol *Start = Scope->getStartLabel();
MCSymbol *End = Scope->getEndLabel();
- if (Start == 0) return 0;
+ if (Start == 0 || End == 0) return 0;
assert(Start->isDefined() && "Invalid starting label for an inlined scope!");
assert(End->isDefined() && "Invalid end label for an inlined scope!");
@@ -1400,7 +1390,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(DbgScope *Scope) {
DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
MCSymbol *StartLabel = Scope->getStartLabel();
MCSymbol *EndLabel = Scope->getEndLabel();
- if (StartLabel == 0) return 0;
+ if (StartLabel == 0 || EndLabel == 0) return 0;
assert(StartLabel->isDefined() &&
"Invalid starting label for an inlined scope!");
@@ -1413,7 +1403,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
DISubprogram InlinedSP = getDISubprogram(DS.getNode());
DIE *OriginDIE = ModuleCU->getDIE(InlinedSP.getNode());
- assert (OriginDIE && "Unable to find Origin DIE!");
+ assert(OriginDIE && "Unable to find Origin DIE!");
addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, OriginDIE);
@@ -1477,9 +1467,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
DISubprogram InlinedSP = getDISubprogram(DS.getNode());
DIE *OriginSPDIE = ModuleCU->getDIE(InlinedSP.getNode());
(void) OriginSPDIE;
- assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
+ assert(OriginSPDIE && "Unable to find Origin DIE for the SP!");
DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
- assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
+ assert(AbsDIE && "Unable to find Origin DIE for the Variable!");
addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
dwarf::DW_FORM_ref4, AbsDIE);
}
@@ -1508,12 +1498,18 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
MachineLocation Location;
Location.set(DbgValueInsn->getOperand(0).getReg());
addAddress(VariableDie, dwarf::DW_AT_location, Location);
+ if (MCSymbol *VS = DV->getDbgValueLabel())
+ addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+ VS);
} else if (DbgValueInsn->getOperand(0).getType() ==
MachineOperand::MO_Immediate) {
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
unsigned Imm = DbgValueInsn->getOperand(0).getImm();
addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
addBlock(VariableDie, dwarf::DW_AT_const_value, 0, Block);
+ if (MCSymbol *VS = DV->getDbgValueLabel())
+ addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
+ VS);
} else {
//FIXME : Handle other operand types.
delete VariableDie;
@@ -1523,7 +1519,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
} else {
MachineLocation Location;
unsigned FrameReg;
- int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
+ int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(),
+ FrameReg);
Location.set(FrameReg, Offset);
if (VD.hasComplexAddress())
@@ -1576,10 +1573,9 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
else
ScopeDIE = updateSubprogramScopeDIE(DS.getNode());
}
- else {
+ else
ScopeDIE = constructLexicalScopeDIE(Scope);
- if (!ScopeDIE) return NULL;
- }
+ if (!ScopeDIE) return NULL;
// Add variables to scope.
const SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
@@ -1608,7 +1604,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
/// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName) {
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
unsigned DId;
StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
if (DI != DirectoryIdMap.end()) {
@@ -1666,15 +1662,19 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
unsigned ID = GetOrCreateSourceID(Dir, FN);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
- // FIXME: Why getting the delta between two identical labels??
- addSectionOffset(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4,
- getTempLabel("section_line"), getTempLabel("section_line"),
- false);
addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
DIUnit.getProducer());
addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data1,
DIUnit.getLanguage());
addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN);
+ addLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ getTempLabel("text_begin"));
+ addLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ getTempLabel("text_end"));
+ // DW_AT_stmt_list is a offset of line number information for this
+ // compile unit in debug_line section. It is always zero when only one
+ // compile unit is emitted in one object file.
+ addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0);
if (!Dir.empty())
addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir);
@@ -1717,13 +1717,13 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
DIDescriptor GVContext = DI_GV.getContext();
// Do not create specification DIE if context is either compile unit
// or a subprogram.
- if (DI_GV.isDefinition() && !GVContext.isCompileUnit()
- && !GVContext.isFile() && !GVContext.isSubprogram()) {
+ if (DI_GV.isDefinition() && !GVContext.isCompileUnit() &&
+ !GVContext.isFile() && !GVContext.isSubprogram()) {
// Create specification DIE.
DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable);
addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification,
dwarf::DW_FORM_ref4, VariableDie);
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
addLabel(Block, 0, dwarf::DW_FORM_udata,
Asm->Mang->getSymbol(DI_GV.getGlobal()));
@@ -1731,7 +1731,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
addUInt(VariableDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
ModuleCU->addDie(VariableSpecDIE);
} else {
- DIEBlock *Block = new DIEBlock();
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
addLabel(Block, 0, dwarf::DW_FORM_udata,
Asm->Mang->getSymbol(DI_GV.getGlobal()));
@@ -1745,7 +1745,7 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
DIType GTy = DI_GV.getType();
if (GTy.isCompositeType() && !GTy.getName().empty()) {
DIEEntry *Entry = ModuleCU->getDIEEntry(GTy.getNode());
- assert (Entry && "Missing global type!");
+ assert(Entry && "Missing global type!");
ModuleCU->addGlobalType(GTy.getName(), Entry->getEntry());
}
return;
@@ -1783,12 +1783,11 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
this->M = M;
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
if (!MAI->doesSupportDebugInformation())
return;
+ TimeRegion Timer(DebugTimer);
+
DebugInfoFinder DbgFinder;
DbgFinder.processModule(*M);
@@ -1836,9 +1835,6 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
// Emit initial sections
emitInitial();
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
}
/// endModule - Emit all Dwarf sections that should come after the content.
@@ -1847,8 +1843,7 @@ void DwarfDebug::endModule() {
if (!ModuleCU)
return;
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
+ TimeRegion Timer(DebugTimer);
// Attach DW_AT_inline attribute with inlined subprogram DIEs.
for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
@@ -1871,7 +1866,7 @@ void DwarfDebug::endModule() {
if (!NDie) continue;
addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
// FIXME - This is not the correct approach.
- // addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ //addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie
}
// Standard sections final addresses.
@@ -1932,9 +1927,6 @@ void DwarfDebug::endModule() {
delete ModuleCU;
ModuleCU = NULL; // Reset for the next Module, if any.
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
}
/// findAbstractVariable - Find abstract variable, if any, associated with Var.
@@ -1971,10 +1963,11 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var,
if (!Scope)
return NULL;
- AbsDbgVariable = new DbgVariable(Var, MI,
+ AbsDbgVariable = new DbgVariable(Var, MI,
NULL /* No more-abstract variable*/);
Scope->addVariable(AbsDbgVariable);
AbstractVariables[Var.getNode()] = AbsDbgVariable;
+ DbgValueStartMap[MI] = AbsDbgVariable;
return AbsDbgVariable;
}
@@ -2010,16 +2003,19 @@ void DwarfDebug::collectVariableInfo() {
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
- if (MInsn->getOpcode() != TargetOpcode::DBG_VALUE)
+ if (!MInsn->isDebugValue())
continue;
+
// FIXME : Lift this restriction.
if (MInsn->getNumOperands() != 3)
continue;
- DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands() - 1).getMetadata()));
+ DIVariable DV((MDNode*)(MInsn->getOperand(MInsn->getNumOperands()
+ - 1).getMetadata()));
if (DV.getTag() == dwarf::DW_TAG_arg_variable) {
// FIXME Handle inlined subroutine arguments.
DbgVariable *ArgVar = new DbgVariable(DV, MInsn, NULL);
CurrentFnDbgScope->addVariable(ArgVar);
+ DbgValueStartMap[MInsn] = ArgVar;
continue;
}
@@ -2034,19 +2030,54 @@ void DwarfDebug::collectVariableInfo() {
if (!Scope)
continue;
- DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn,
- ScopeLoc);
+ DbgVariable *AbsDbgVariable = findAbstractVariable(DV, MInsn, ScopeLoc);
DbgVariable *RegVar = new DbgVariable(DV, MInsn, AbsDbgVariable);
+ DbgValueStartMap[MInsn] = RegVar;
Scope->addVariable(RegVar);
}
}
}
-/// beginScope - Process beginning of a scope starting at Label.
-void DwarfDebug::beginScope(const MachineInstr *MI, MCSymbol *Label) {
+/// beginScope - Process beginning of a scope.
+void DwarfDebug::beginScope(const MachineInstr *MI) {
+ // Check location.
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL.isUnknown())
+ return;
+ DILocation DILoc = MF->getDILocation(DL);
+ if (!DILoc.getScope().Verify())
+ return;
+
+ // Check and update last known location info.
+ if(DILoc.getNode() == PrevDILoc)
+ return;
+
+ // DBG_VALUE instruction establishes new value.
+ if (MI->isDebugValue()) {
+ DenseMap<const MachineInstr *, DbgVariable *>::iterator DI
+ = DbgValueStartMap.find(MI);
+ if (DI != DbgValueStartMap.end()) {
+ MCSymbol *Label = recordSourceLine(DILoc.getLineNumber(),
+ DILoc.getColumnNumber(),
+ DILoc.getScope().getNode());
+ PrevDILoc = DILoc.getNode();
+ DI->second->setDbgValueLabel(Label);
+ }
+ return;
+ }
+
+ // Emit a label to indicate location change. This is used for line
+ // table even if this instruction does start a new scope.
+ MCSymbol *Label = recordSourceLine(DILoc.getLineNumber(),
+ DILoc.getColumnNumber(),
+ DILoc.getScope().getNode());
+ PrevDILoc = DILoc.getNode();
+
+ // update DbgScope if this instruction starts a new scope.
InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
if (I == DbgScopeBeginMap.end())
return;
+
ScopeVector &SD = I->second;
for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
SDI != SDE; ++SDI)
@@ -2055,6 +2086,19 @@ void DwarfDebug::beginScope(const MachineInstr *MI, MCSymbol *Label) {
/// endScope - Process end of a scope.
void DwarfDebug::endScope(const MachineInstr *MI) {
+ // Ignore DBG_VALUE instruction.
+ if (MI->isDebugValue())
+ return;
+
+ // Check location.
+ DebugLoc DL = MI->getDebugLoc();
+ if (DL.isUnknown())
+ return;
+ DILocation DILoc = MF->getDILocation(DL);
+ if (!DILoc.getScope().Verify())
+ return;
+
+ // Emit a label and update DbgScope if this instruction ends a scope.
InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
if (I == DbgScopeEndMap.end())
return;
@@ -2094,7 +2138,7 @@ void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
}
/// extractScopeInformation - Scan machine instructions in this function
-/// and collect DbgScopes. Return true, if atleast one scope was found.
+/// and collect DbgScopes. Return true, if at least one scope was found.
bool DwarfDebug::extractScopeInformation() {
// If scope information was extracted using .dbg intrinsics then there is not
// any need to extract these information by scanning each instruction.
@@ -2110,12 +2154,13 @@ bool DwarfDebug::extractScopeInformation() {
II != IE; ++II) {
const MachineInstr *MInsn = II;
// FIXME : Remove DBG_VALUE check.
- if (MInsn->getOpcode() == TargetOpcode::DBG_VALUE) continue;
+ if (MInsn->isDebugValue()) continue;
MIIndexMap[MInsn] = MIIndex++;
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) continue;
DILocation DLT = MF->getDILocation(DL);
DIScope DLTScope = DLT.getScope();
+ if (!DLTScope.getNode()) continue;
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
// into a scope DIE at the end.
@@ -2132,11 +2177,12 @@ bool DwarfDebug::extractScopeInformation() {
II != IE; ++II) {
const MachineInstr *MInsn = II;
// FIXME : Remove DBG_VALUE check.
- if (MInsn->getOpcode() == TargetOpcode::DBG_VALUE) continue;
+ if (MInsn->isDebugValue()) continue;
DebugLoc DL = MInsn->getDebugLoc();
if (DL.isUnknown()) continue;
DILocation DLT = MF->getDILocation(DL);
DIScope DLTScope = DLT.getScope();
+ if (!DLTScope.getNode()) continue;
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
// into a scope DIE at the end.
@@ -2159,7 +2205,7 @@ bool DwarfDebug::extractScopeInformation() {
SmallVector<DbgScope *, 4> WorkList;
WorkList.push_back(CurrentFnDbgScope);
while (!WorkList.empty()) {
- DbgScope *S = WorkList.back(); WorkList.pop_back();
+ DbgScope *S = WorkList.pop_back_val();
const SmallVector<DbgScope *, 4> &Children = S->getScopes();
if (!Children.empty())
@@ -2170,7 +2216,7 @@ bool DwarfDebug::extractScopeInformation() {
if (S->isAbstractScope())
continue;
const MachineInstr *MI = S->getFirstInsn();
- assert (MI && "DbgScope does not have first instruction!");
+ assert(MI && "DbgScope does not have first instruction!");
InsnToDbgScopeMapTy::iterator IDI = DbgScopeBeginMap.find(MI);
if (IDI != DbgScopeBeginMap.end())
@@ -2179,7 +2225,7 @@ bool DwarfDebug::extractScopeInformation() {
DbgScopeBeginMap[MI].push_back(S);
MI = S->getLastInsn();
- assert (MI && "DbgScope does not have last instruction!");
+ assert(MI && "DbgScope does not have last instruction!");
IDI = DbgScopeEndMap.find(MI);
if (IDI != DbgScopeEndMap.end())
IDI->second.push_back(S);
@@ -2196,12 +2242,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
this->MF = MF;
if (!ShouldEmitDwarfDebug()) return;
-
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
if (!extractScopeInformation())
return;
+
+ TimeRegion Timer(DebugTimer);
collectVariableInfo();
@@ -2225,20 +2269,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
recordSourceLine(Line, Col, DLT.getScope().getNode());
}
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
}
/// endFunction - Gather and emit post-function debug information.
///
void DwarfDebug::endFunction(const MachineFunction *MF) {
if (!ShouldEmitDwarfDebug()) return;
-
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- if (DbgScopeMap.empty())
- return;
+ if (DbgScopeMap.empty()) return;
+
+ TimeRegion Timer(DebugTimer);
if (CurrentFnDbgScope) {
// Define end label for subprogram.
@@ -2271,14 +2310,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DeleteContainerSeconds(DbgScopeMap);
DbgScopeBeginMap.clear();
DbgScopeEndMap.clear();
+ DbgValueStartMap.clear();
ConcreteScopes.clear();
DeleteContainerSeconds(AbstractScopes);
AbstractScopesList.clear();
AbstractVariables.clear();
Lines.clear();
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
}
/// recordSourceLine - Register a source line with debug info. Returns the
@@ -2288,8 +2325,7 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
if (!MMI)
return 0;
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
+ TimeRegion Timer(DebugTimer);
StringRef Dir;
StringRef Fn;
@@ -2314,9 +2350,6 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
Asm->OutStreamer.EmitLabel(Label);
return Label;
}
@@ -2328,15 +2361,8 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, MDNode *S) {
/// well.
unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
const std::string &FileName) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- unsigned SrcId = GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return SrcId;
+ TimeRegion Timer(DebugTimer);
+ return GetOrCreateSourceID(DirName.c_str(), FileName.c_str());
}
//===----------------------------------------------------------------------===//
@@ -2351,7 +2377,8 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) {
const std::vector<DIE *> &Children = Die->getChildren();
// If not last sibling and has children then add sibling offset attribute.
- if (!Last && !Children.empty()) Die->addSiblingOffset();
+ if (!Last && !Children.empty())
+ Die->addSiblingOffset(DIEValueAllocator);
// Record the abbreviation.
assignAbbrevNumber(Die->getAbbrev());
@@ -2465,7 +2492,7 @@ void DwarfDebug::emitDIE(DIE *Die) {
dwarf::TagString(Abbrev->getTag()));
EmitULEB128(AbbrevNumber);
- SmallVector<DIEValue*, 32> &Values = Die->getValues();
+ const SmallVector<DIEValue*, 32> &Values = Die->getValues();
const SmallVector<DIEAbbrevData, 8> &AbbrevData = Abbrev->getData();
// Emit the DIE attribute values.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index d6634e1..ad6b0c2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/FoldingSet.h"
@@ -98,9 +99,11 @@ class DwarfDebug : public DwarfPrinter {
/// Lines - List of source line correspondence.
std::vector<SrcLineInfo> Lines;
- /// DIEValues - A list of all the unique values in use.
- ///
- std::vector<DIEValue *> DIEValues;
+ /// DIEBlocks - A list of all the DIEBlocks in use.
+ std::vector<DIEBlock *> DIEBlocks;
+
+ // DIEValueAllocator - All DIEValues are allocated through this allocator.
+ BumpPtrAllocator DIEValueAllocator;
/// StringPool - A String->Symbol mapping of strings used by indirect
/// references.
@@ -141,12 +144,21 @@ class DwarfDebug : public DwarfPrinter {
/// AbstractScopes - Tracks the abstract scopes a module. These scopes are
/// not included DbgScopeMap. AbstractScopes owns its DbgScope*s.
DenseMap<MDNode *, DbgScope *> AbstractScopes;
+
+ /// AbstractScopesList - Tracks abstract scopes constructed while processing
+ /// a function. This list is cleared during endFunction().
SmallVector<DbgScope *, 4>AbstractScopesList;
/// AbstractVariables - Collection on abstract variables. Owned by the
/// DbgScopes in AbstractScopes.
DenseMap<MDNode *, DbgVariable *> AbstractVariables;
+ /// DbgValueStartMap - Tracks starting scope of variable DIEs.
+ /// If the scope of an object begins sometime after the low pc value for the
+ /// scope most closely enclosing the object, the object entry may have a
+ /// DW_AT_start_scope attribute.
+ DenseMap<const MachineInstr *, DbgVariable *> DbgValueStartMap;
+
/// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
/// (at the end of the module) as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
@@ -181,6 +193,10 @@ class DwarfDebug : public DwarfPrinter {
/// function.
DenseMap<CompileUnit *, unsigned> CompileUnitOffsets;
+ /// Previous instruction's location information. This is used to determine
+ /// label location to indicate scope boundries in dwarf debug info.
+ mutable const MDNode *PrevDILoc;
+
/// DebugTimer - Timer for the Dwarf debug writer.
Timer *DebugTimer;
@@ -250,12 +266,6 @@ class DwarfDebug : public DwarfPrinter {
void addLabel(DIE *Die, unsigned Attribute, unsigned Form,
const MCSymbol *Label);
- /// addSectionOffset - Add a section offset label attribute data and value.
- ///
- void addSectionOffset(DIE *Die, unsigned Attribute, unsigned Form,
- const MCSymbol *Label, const MCSymbol *Section,
- bool isEH = false);
-
/// addDelta - Add a label delta attribute data and value.
///
void addDelta(DIE *Die, unsigned Attribute, unsigned Form,
@@ -545,8 +555,8 @@ public:
/// collectVariableInfo - Populate DbgScope entries with variables' info.
void collectVariableInfo();
- /// beginScope - Process beginning of a scope starting at Label.
- void beginScope(const MachineInstr *MI, MCSymbol *Label);
+ /// beginScope - Process beginning of a scope.
+ void beginScope(const MachineInstr *MI);
/// endScope - Prcess end of a scope.
void endScope(const MachineInstr *MI);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 4946b4c..8b616b0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -419,23 +419,24 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI->getOperand(I);
- if (MO.isGlobal()) {
- if (Function *F = dyn_cast<Function>(MO.getGlobal())) {
- if (SawFunc) {
- // Be conservative. If we have more than one function operand for this
- // call, then we can't make the assumption that it's the callee and
- // not a parameter to the call.
- //
- // FIXME: Determine if there's a way to say that `F' is the callee or
- // parameter.
- MarkedNoUnwind = false;
- break;
- }
-
- MarkedNoUnwind = F->doesNotThrow();
- SawFunc = true;
- }
+ if (!MO.isGlobal()) continue;
+
+ Function *F = dyn_cast<Function>(MO.getGlobal());
+ if (F == 0) continue;
+
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
}
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
}
return MarkedNoUnwind;
@@ -504,7 +505,10 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
LastLabel = LandingPad->EndLabels[P.RangeIndex];
assert(BeginLabel && LastLabel && "Invalid landing pad!");
- if (LandingPad->LandingPadLabel) {
+ if (!LandingPad->LandingPadLabel) {
+ // Create a gap.
+ PreviousIsInvoke = false;
+ } else {
// This try-range is for an invoke.
CallSiteEntry Site = {
BeginLabel,
@@ -536,9 +540,6 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
CallSites[SiteNo - 1] = Site;
}
PreviousIsInvoke = true;
- } else {
- // Create a gap.
- PreviousIsInvoke = false;
}
}
}
@@ -885,8 +886,7 @@ void DwarfException::EndModule() {
if (!shouldEmitMovesModule && !shouldEmitTableModule)
return;
- if (TimePassesIsEnabled)
- ExceptionTimer->startTimer();
+ TimeRegion Timer(ExceptionTimer);
const std::vector<Function *> Personalities = MMI->getPersonalities();
@@ -896,9 +896,6 @@ void DwarfException::EndModule() {
for (std::vector<FunctionEHFrameInfo>::iterator
I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
EmitFDE(*I);
-
- if (TimePassesIsEnabled)
- ExceptionTimer->stopTimer();
}
/// BeginFunction - Gather pre-function exception information. Assumes it's
@@ -906,9 +903,7 @@ void DwarfException::EndModule() {
void DwarfException::BeginFunction(const MachineFunction *MF) {
if (!MMI || !MAI->doesSupportExceptionHandling()) return;
- if (TimePassesIsEnabled)
- ExceptionTimer->startTimer();
-
+ TimeRegion Timer(ExceptionTimer);
this->MF = MF;
shouldEmitTable = shouldEmitMoves = false;
@@ -924,9 +919,6 @@ void DwarfException::BeginFunction(const MachineFunction *MF) {
shouldEmitTableModule |= shouldEmitTable;
shouldEmitMovesModule |= shouldEmitMoves;
-
- if (TimePassesIsEnabled)
- ExceptionTimer->stopTimer();
}
/// EndFunction - Gather and emit post-function exception information.
@@ -934,9 +926,7 @@ void DwarfException::BeginFunction(const MachineFunction *MF) {
void DwarfException::EndFunction() {
if (!shouldEmitMoves && !shouldEmitTable) return;
- if (TimePassesIsEnabled)
- ExceptionTimer->startTimer();
-
+ TimeRegion Timer(ExceptionTimer);
Asm->OutStreamer.EmitLabel(getDWLabel("eh_func_end", SubprogramCount));
// Record if this personality index uses a landing pad.
@@ -961,7 +951,4 @@ void DwarfException::EndFunction() {
!MMI->getLandingPads().empty(),
MMI->getFrameMoves(),
MF->getFunction()));
-
- if (TimePassesIsEnabled)
- ExceptionTimer->stopTimer();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index e212696..17eb2e8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -45,14 +45,14 @@ MCSymbol *DwarfPrinter::getDWLabel(const char *Name, unsigned ID) const {
//assert(ID && "Should use getTempLabel if no ID");
if (ID == 0) return getTempLabel(Name);
- return Asm->OutContext.GetOrCreateTemporarySymbol
+ return Asm->OutContext.GetOrCreateSymbol
(Twine(MAI->getPrivateGlobalPrefix()) + Twine(Name) + Twine(ID));
}
/// getTempLabel - Return the MCSymbol corresponding to the assembler temporary
/// label with the specified name.
MCSymbol *DwarfPrinter::getTempLabel(const char *Name) const {
- return Asm->OutContext.GetOrCreateTemporarySymbol
+ return Asm->OutContext.GetOrCreateSymbol
(Twine(MAI->getPrivateGlobalPrefix()) + Name);
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 9fd4c44..a2d7ab1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -73,27 +73,14 @@ void DwarfWriter::EndFunction(const MachineFunction *MF) {
MMI->EndFunction();
}
-/// RecordSourceLine - Register a source line with debug info. Returns the
-/// unique label that was emitted and which provides correspondence to
-/// the source line list.
-MCSymbol *DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
- MDNode *Scope) {
- return DD->recordSourceLine(Line, Col, Scope);
-}
-
-/// getRecordSourceLineCount - Count source lines.
-unsigned DwarfWriter::getRecordSourceLineCount() {
- return DD->getSourceLineCount();
-}
-
/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
/// be emitted.
bool DwarfWriter::ShouldEmitDwarfDebug() const {
return DD && DD->ShouldEmitDwarfDebug();
}
-void DwarfWriter::BeginScope(const MachineInstr *MI, MCSymbol *L) {
- DD->beginScope(MI, L);
+void DwarfWriter::BeginScope(const MachineInstr *MI) {
+ DD->beginScope(MI);
}
void DwarfWriter::EndScope(const MachineInstr *MI) {
DD->endScope(MI);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 151e9cd..8f51940 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -972,15 +972,21 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
// optimize branches that branch to either a return block or an assert block
// into a fallthrough to the return.
- if (MBB1->empty() || MBB2->empty()) return false;
+ if (IsEmptyBlock(MBB1) || IsEmptyBlock(MBB2)) return false;
// If there is a clear successor ordering we make sure that one block
// will fall through to the next
if (MBB1->isSuccessor(MBB2)) return true;
if (MBB2->isSuccessor(MBB1)) return false;
- MachineInstr *MBB1I = --MBB1->end();
- MachineInstr *MBB2I = --MBB2->end();
+ // Neither block consists entirely of debug info (per IsEmptyBlock check),
+ // so we needn't test for falling off the beginning here.
+ MachineBasicBlock::iterator MBB1I = --MBB1->end();
+ while (MBB1I->isDebugValue())
+ --MBB1I;
+ MachineBasicBlock::iterator MBB2I = --MBB2->end();
+ while (MBB2I->isDebugValue())
+ --MBB2I;
return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
}
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 39fc85e..8bae9ed 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -8,19 +8,19 @@
//===----------------------------------------------------------------------===//
//
// This pass mulches exception handling code into a form adapted to code
-// generation. Required if using dwarf exception handling.
+// generation. Required if using dwarf exception handling.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "dwarfehprepare"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
-#include "llvm/CodeGen/Passes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -40,6 +40,15 @@ namespace {
// The eh.exception intrinsic.
Function *ExceptionValueIntrinsic;
+ // The eh.selector intrinsic.
+ Function *SelectorIntrinsic;
+
+ // _Unwind_Resume_or_Rethrow call.
+ Constant *URoR;
+
+ // The EH language-specific catch-all type.
+ GlobalVariable *EHCatchAllValue;
+
// _Unwind_Resume or the target equivalent.
Constant *RewindFunction;
@@ -67,18 +76,88 @@ namespace {
Instruction *CreateValueLoad(BasicBlock *BB);
/// CreateReadOfExceptionValue - Return the result of the eh.exception
- /// intrinsic by calling the intrinsic if in a landing pad, or loading
- /// it from the exception value variable otherwise.
+ /// intrinsic by calling the intrinsic if in a landing pad, or loading it
+ /// from the exception value variable otherwise.
Instruction *CreateReadOfExceptionValue(BasicBlock *BB) {
return LandingPads.count(BB) ?
CreateExceptionValueCall(BB) : CreateValueLoad(BB);
}
+ /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still
+ /// use the ".llvm.eh.catch.all.value" call need to convert to using it's
+ /// initializer instead.
+ bool CleanupSelectors();
+
+ /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+ void FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels);
+
+ /// FindAllURoRInvokes - Find all URoR invokes in the function.
+ void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
+
+ /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow"
+ /// calls. The "unwind" part of these invokes jump to a landing pad within
+ /// the current function. This is a candidate to merge the selector
+ /// associated with the URoR invoke with the one from the URoR's landing
+ /// pad.
+ bool HandleURoRInvokes();
+
+ /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
+ /// with the eh.exception call. This recursively looks past instructions
+ /// which don't change the EH pointer value, like casts or PHI nodes.
+ bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+ SmallPtrSet<IntrinsicInst*, 8> &SelCalls);
+
+ /// DoMem2RegPromotion - Take an alloca call and promote it from memory to a
+ /// register.
+ bool DoMem2RegPromotion(Value *V) {
+ AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI || !isAllocaPromotable(AI)) return false;
+
+ // Turn the alloca into a register.
+ std::vector<AllocaInst*> Allocas(1, AI);
+ PromoteMemToReg(Allocas, *DT, *DF);
+ return true;
+ }
+
+ /// PromoteStoreInst - Perform Mem2Reg on a StoreInst.
+ bool PromoteStoreInst(StoreInst *SI) {
+ if (!SI || !DT || !DF) return false;
+ if (DoMem2RegPromotion(SI->getOperand(1)))
+ return true;
+ return false;
+ }
+
+ /// PromoteEHPtrStore - Promote the storing of an EH pointer into a
+ /// register. This should get rid of the store and subsequent loads.
+ bool PromoteEHPtrStore(IntrinsicInst *II) {
+ if (!DT || !DF) return false;
+
+ bool Changed = false;
+ StoreInst *SI;
+
+ while (1) {
+ SI = 0;
+ for (Value::use_iterator
+ I = II->use_begin(), E = II->use_end(); I != E; ++I) {
+ SI = dyn_cast<StoreInst>(I);
+ if (SI) break;
+ }
+
+ if (!PromoteStoreInst(SI))
+ break;
+
+ Changed = true;
+ }
+
+ return false;
+ }
+
public:
static char ID; // Pass identification, replacement for typeid.
DwarfEHPrepare(const TargetLowering *tli, bool fast) :
FunctionPass(&ID), TLI(tli), CompileFast(fast),
- ExceptionValueIntrinsic(0), RewindFunction(0) {}
+ ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
+ URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
virtual bool runOnFunction(Function &Fn);
@@ -105,6 +184,233 @@ FunctionPass *llvm::createDwarfEHPass(const TargetLowering *tli, bool fast) {
return new DwarfEHPrepare(tli, fast);
}
+/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups.
+void DwarfEHPrepare::
+FindAllCleanupSelectors(SmallPtrSet<IntrinsicInst*, 32> &Sels) {
+ for (Value::use_iterator
+ I = SelectorIntrinsic->use_begin(),
+ E = SelectorIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *SI = cast<IntrinsicInst>(I);
+ if (!SI || SI->getParent()->getParent() != F) continue;
+
+ unsigned NumOps = SI->getNumOperands();
+ if (NumOps > 4) continue;
+ bool IsCleanUp = (NumOps == 3);
+
+ if (!IsCleanUp)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getOperand(3)))
+ IsCleanUp = (CI->getZExtValue() == 0);
+
+ if (IsCleanUp)
+ Sels.insert(SI);
+ }
+}
+
+/// FindAllURoRInvokes - Find all URoR invokes in the function.
+void DwarfEHPrepare::
+FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes) {
+ for (Value::use_iterator
+ I = URoR->use_begin(),
+ E = URoR->use_end(); I != E; ++I) {
+ if (InvokeInst *II = dyn_cast<InvokeInst>(I))
+ URoRInvokes.insert(II);
+ }
+}
+
+/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use
+/// the ".llvm.eh.catch.all.value" call need to convert to using it's
+/// initializer instead.
+bool DwarfEHPrepare::CleanupSelectors() {
+ if (!EHCatchAllValue) return false;
+
+ if (!SelectorIntrinsic) {
+ SelectorIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+ if (!SelectorIntrinsic) return false;
+ }
+
+ bool Changed = false;
+ for (Value::use_iterator
+ I = SelectorIntrinsic->use_begin(),
+ E = SelectorIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(I);
+ if (!Sel || Sel->getParent()->getParent() != F) continue;
+
+ // Index of the ".llvm.eh.catch.all.value" variable.
+ unsigned OpIdx = Sel->getNumOperands() - 1;
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Sel->getOperand(OpIdx));
+ if (GV != EHCatchAllValue) continue;
+ Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer());
+ Changed = true;
+ }
+
+ return Changed;
+}
+
+/// FindSelectorAndURoR - Find the eh.selector call associated with the
+/// eh.exception call. And indicate if there is a URoR "invoke" associated with
+/// the eh.exception call. This recursively looks past instructions which don't
+/// change the EH pointer value, like casts or PHI nodes.
+bool
+DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
+ SmallPtrSet<IntrinsicInst*, 8> &SelCalls) {
+ SmallPtrSet<PHINode*, 32> SeenPHIs;
+ bool Changed = false;
+
+ restart:
+ for (Value::use_iterator
+ I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) {
+ Instruction *II = dyn_cast<Instruction>(I);
+ if (!II || II->getParent()->getParent() != F) continue;
+
+ if (IntrinsicInst *Sel = dyn_cast<IntrinsicInst>(II)) {
+ if (Sel->getIntrinsicID() == Intrinsic::eh_selector)
+ SelCalls.insert(Sel);
+ } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(II)) {
+ if (Invoke->getCalledFunction() == URoR)
+ URoRInvoke = true;
+ } else if (CastInst *CI = dyn_cast<CastInst>(II)) {
+ Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
+ if (!PromoteStoreInst(SI)) continue;
+ Changed = true;
+ SeenPHIs.clear();
+ goto restart; // Uses may have changed, restart loop.
+ } else if (PHINode *PN = dyn_cast<PHINode>(II)) {
+ if (SeenPHIs.insert(PN))
+ // Don't process a PHI node more than once.
+ Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls);
+ }
+ }
+
+ return Changed;
+}
+
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
+/// "unwind" part of these invokes jump to a landing pad within the current
+/// function. This is a candidate to merge the selector associated with the URoR
+/// invoke with the one from the URoR's landing pad.
+bool DwarfEHPrepare::HandleURoRInvokes() {
+ if (!DT) return CleanupSelectors(); // We require DominatorTree information.
+
+ if (!EHCatchAllValue) {
+ EHCatchAllValue =
+ F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value");
+ if (!EHCatchAllValue) return false;
+ }
+
+ if (!SelectorIntrinsic) {
+ SelectorIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector);
+ if (!SelectorIntrinsic) return false;
+ }
+
+ if (!URoR) {
+ URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
+ if (!URoR) return CleanupSelectors();
+ }
+
+ SmallPtrSet<IntrinsicInst*, 32> Sels;
+ SmallPtrSet<InvokeInst*, 32> URoRInvokes;
+ FindAllCleanupSelectors(Sels);
+ FindAllURoRInvokes(URoRInvokes);
+
+ SmallPtrSet<IntrinsicInst*, 32> SelsToConvert;
+
+ for (SmallPtrSet<IntrinsicInst*, 32>::iterator
+ SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) {
+ const BasicBlock *SelBB = (*SI)->getParent();
+ for (SmallPtrSet<InvokeInst*, 32>::iterator
+ UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) {
+ const BasicBlock *URoRBB = (*UI)->getParent();
+ if (SelBB == URoRBB || DT->dominates(SelBB, URoRBB)) {
+ SelsToConvert.insert(*SI);
+ break;
+ }
+ }
+ }
+
+ bool Changed = false;
+
+ if (Sels.size() != SelsToConvert.size()) {
+ // If we haven't been able to convert all of the clean-up selectors, then
+ // loop through the slow way to see if they still need to be converted.
+ if (!ExceptionValueIntrinsic) {
+ ExceptionValueIntrinsic =
+ Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception);
+ if (!ExceptionValueIntrinsic) return CleanupSelectors();
+ }
+
+ for (Value::use_iterator
+ I = ExceptionValueIntrinsic->use_begin(),
+ E = ExceptionValueIntrinsic->use_end(); I != E; ++I) {
+ IntrinsicInst *EHPtr = dyn_cast<IntrinsicInst>(I);
+ if (!EHPtr || EHPtr->getParent()->getParent() != F) continue;
+
+ Changed |= PromoteEHPtrStore(EHPtr);
+
+ bool URoRInvoke = false;
+ SmallPtrSet<IntrinsicInst*, 8> SelCalls;
+ Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls);
+
+ if (URoRInvoke) {
+ // This EH pointer is being used by an invoke of an URoR instruction and
+ // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we
+ // need to convert it to a 'catch-all'.
+ for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+ SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) {
+ IntrinsicInst *II = *SI;
+ unsigned NumOps = II->getNumOperands();
+
+ if (NumOps <= 4) {
+ bool IsCleanUp = (NumOps == 3);
+
+ if (!IsCleanUp)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(3)))
+ IsCleanUp = (CI->getZExtValue() == 0);
+
+ if (IsCleanUp)
+ SelsToConvert.insert(II);
+ }
+ }
+ }
+ }
+ }
+
+ if (!SelsToConvert.empty()) {
+ // Convert all clean-up eh.selectors, which are associated with "invokes" of
+ // URoR calls, into catch-all eh.selectors.
+ Changed = true;
+
+ for (SmallPtrSet<IntrinsicInst*, 8>::iterator
+ SI = SelsToConvert.begin(), SE = SelsToConvert.end();
+ SI != SE; ++SI) {
+ IntrinsicInst *II = *SI;
+ SmallVector<Value*, 8> Args;
+
+ // Use the exception object pointer and the personality function
+ // from the original selector.
+ Args.push_back(II->getOperand(1)); // Exception object pointer.
+ Args.push_back(II->getOperand(2)); // Personality function.
+ Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
+
+ CallInst *NewSelector =
+ CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(),
+ "eh.sel.catch.all", II);
+
+ NewSelector->setTailCall(II->isTailCall());
+ NewSelector->setAttributes(II->getAttributes());
+ NewSelector->setCallingConv(II->getCallingConv());
+
+ II->replaceAllUsesWith(NewSelector);
+ II->eraseFromParent();
+ }
+ }
+
+ Changed |= CleanupSelectors();
+ return Changed;
+}
+
/// NormalizeLandingPads - Normalize and discover landing pads, noting them
/// in the LandingPads set. A landing pad is normal if the only CFG edges
/// that end at it are unwind edges from invoke instructions. If we inlined
@@ -422,6 +728,8 @@ bool DwarfEHPrepare::runOnFunction(Function &Fn) {
if (!CompileFast)
Changed |= PromoteStackTemporaries();
+ Changed |= HandleURoRInvokes();
+
LandingPads.clear();
return Changed;
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index e207f60..025ad05 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -303,9 +303,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
// otherwise mark it as ~1U so it can be nuked later.
if (ValNo->id == getNumValNums()-1) {
do {
- VNInfo *VNI = valnos.back();
valnos.pop_back();
- VNI->~VNInfo();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
ValNo->setIsUnused(true);
@@ -351,9 +349,7 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
// otherwise mark it as ~1U so it can be nuked later.
if (ValNo->id == getNumValNums()-1) {
do {
- VNInfo *VNI = valnos.back();
valnos.pop_back();
- VNI->~VNInfo();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
ValNo->setIsUnused(true);
@@ -579,9 +575,7 @@ void LiveInterval::MergeValueInAsValue(
// mark it as ~1U so it can be nuked later.
if (V1->id == getNumValNums()-1) {
do {
- VNInfo *VNI = valnos.back();
valnos.pop_back();
- VNI->~VNInfo();
} while (!valnos.empty() && valnos.back()->isUnused());
} else {
V1->setIsUnused(true);
@@ -597,7 +591,7 @@ void LiveInterval::MergeValueInAsValue(
/// used with an unknown definition value.
void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
const LiveInterval &Clobbers,
- BumpPtrAllocator &VNInfoAllocator) {
+ VNInfo::Allocator &VNInfoAllocator) {
if (Clobbers.empty()) return;
DenseMap<VNInfo*, VNInfo*> ValNoMaps;
@@ -658,14 +652,13 @@ void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
if (UnusedValNo) {
// Delete the last unused val#.
valnos.pop_back();
- UnusedValNo->~VNInfo();
}
}
void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
SlotIndex Start,
SlotIndex End,
- BumpPtrAllocator &VNInfoAllocator) {
+ VNInfo::Allocator &VNInfoAllocator) {
// Find a value # to use for the clobber ranges. If there is already a value#
// for unknown values, use it.
VNInfo *ClobberValNo =
@@ -749,9 +742,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
// ~1U so it can be nuked later.
if (V1->id == getNumValNums()-1) {
do {
- VNInfo *VNI = valnos.back();
valnos.pop_back();
- VNI->~VNInfo();
} while (valnos.back()->isUnused());
} else {
V1->setIsUnused(true);
@@ -762,7 +753,7 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
void LiveInterval::Copy(const LiveInterval &RHS,
MachineRegisterInfo *MRI,
- BumpPtrAllocator &VNInfoAllocator) {
+ VNInfo::Allocator &VNInfoAllocator) {
ranges.clear();
valnos.clear();
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(RHS.reg);
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index b3e9216..23cff07 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -91,7 +91,7 @@ void LiveIntervals::releaseMemory() {
r2iMap_.clear();
// Release VNInfo memroy regions after all VNInfo objects are dtor'd.
- VNInfoAllocator.Reset();
+ VNInfoAllocator.DestroyAll();
while (!CloneMIs.empty()) {
MachineInstr *MI = CloneMIs.back();
CloneMIs.pop_back();
@@ -819,8 +819,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
unsigned ImpUse = getReMatImplicitUse(li, MI);
if (ImpUse) {
const LiveInterval &ImpLi = getInterval(ImpUse);
- for (MachineRegisterInfo::use_iterator ri = mri_->use_begin(li.reg),
- re = mri_->use_end(); ri != re; ++ri) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ ri = mri_->use_nodbg_begin(li.reg), re = mri_->use_nodbg_end();
+ ri != re; ++ri) {
MachineInstr *UseMI = &*ri;
SlotIndex UseIdx = getInstructionIndex(UseMI);
if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
@@ -1052,7 +1053,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
// all of its uses are rematerialized, simply delete it.
if (MI == ReMatOrigDefMI && CanDelete) {
DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: "
- << MI << '\n');
+ << *MI << '\n');
RemoveMachineInstrFromMaps(MI);
vrm.RemoveMachineInstrFromMaps(MI);
MI->eraseFromParent();
@@ -1520,6 +1521,12 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
MachineOperand &O = ri.getOperand();
MachineInstr *MI = &*ri;
++ri;
+ if (MI->isDebugValue()) {
+ // Remove debug info for now.
+ O.setReg(0U);
+ DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI);
+ continue;
+ }
if (O.isDef()) {
assert(MI->isImplicitDef() &&
"Register def was not rewritten?");
@@ -2012,6 +2019,8 @@ unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li,
E = mri_->reg_end(); I != E; ++I) {
MachineOperand &O = I.getOperand();
MachineInstr *MI = O.getParent();
+ if (MI->isDebugValue())
+ continue;
SlotIndex Index = getInstructionIndex(MI);
if (pli.liveAt(Index))
++NumConflicts;
@@ -2052,7 +2061,7 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
E = mri_->reg_end(); I != E; ++I) {
MachineOperand &O = I.getOperand();
MachineInstr *MI = O.getParent();
- if (SeenMIs.count(MI))
+ if (MI->isDebugValue() || SeenMIs.count(MI))
continue;
SeenMIs.insert(MI);
SlotIndex Index = getInstructionIndex(MI);
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index d2f3775..798b9b9 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -36,7 +36,7 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
void LiveStacks::releaseMemory() {
// Release VNInfo memroy regions after all VNInfo objects are dtor'd.
- VNInfoAllocator.Reset();
+ VNInfoAllocator.DestroyAll();
S2IMap.clear();
S2RCMap.clear();
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 519990e..ca8ecff 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -556,17 +556,21 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
if (MI->isPHI())
NumOperandsToProcess = 1;
+ // Clear kill and dead markers. LV will recompute them.
SmallVector<unsigned, 4> UseRegs;
SmallVector<unsigned, 4> DefRegs;
for (unsigned i = 0; i != NumOperandsToProcess; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.getReg() == 0)
continue;
unsigned MOReg = MO.getReg();
- if (MO.isUse())
+ if (MO.isUse()) {
+ MO.setIsKill(false);
UseRegs.push_back(MOReg);
- if (MO.isDef())
+ } else /*MO.isDef()*/ {
+ MO.setIsDead(false);
DefRegs.push_back(MOReg);
+ }
}
// Process all uses.
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index fc8ae5f..bd0ccb4 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -23,6 +23,7 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/raw_ostream.h"
@@ -45,9 +46,9 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
const MachineFunction *MF = getParent();
MCContext &Ctx = MF->getContext();
const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix();
- return Ctx.GetOrCreateTemporarySymbol(Twine(Prefix) + "BB" +
- Twine(MF->getFunctionNumber()) + "_" +
- Twine(getNumber()));
+ return Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" +
+ Twine(MF->getFunctionNumber()) + "_" +
+ Twine(getNumber()));
}
@@ -459,54 +460,41 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
// conditional branch followed by an unconditional branch. DestA is the
// 'true' destination and DestB is the 'false' destination.
- bool MadeChange = false;
- bool AddedFallThrough = false;
+ bool Changed = false;
MachineFunction::iterator FallThru =
llvm::next(MachineFunction::iterator(this));
-
- if (isCond) {
- // If this block ends with a conditional branch that falls through to its
- // successor, set DestB as the successor.
- if (DestB == 0 && FallThru != getParent()->end()) {
+
+ if (DestA == 0 && DestB == 0) {
+ // Block falls through to successor.
+ DestA = FallThru;
+ DestB = FallThru;
+ } else if (DestA != 0 && DestB == 0) {
+ if (isCond)
+ // Block ends in conditional jump that falls through to successor.
DestB = FallThru;
- AddedFallThrough = true;
- }
} else {
- // If this is an unconditional branch with no explicit dest, it must just be
- // a fallthrough into DestA.
- if (DestA == 0 && FallThru != getParent()->end()) {
- DestA = FallThru;
- AddedFallThrough = true;
- }
+ assert(DestA && DestB && isCond &&
+ "CFG in a bad state. Cannot correct CFG edges");
}
-
+
+ // Remove superfluous edges. I.e., those which aren't destinations of this
+ // basic block, duplicate edges, or landing pads.
+ SmallPtrSet<const MachineBasicBlock*, 8> SeenMBBs;
MachineBasicBlock::succ_iterator SI = succ_begin();
- MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
while (SI != succ_end()) {
const MachineBasicBlock *MBB = *SI;
- if (MBB == DestA) {
- DestA = 0;
- ++SI;
- } else if (MBB == DestB) {
- DestB = 0;
- ++SI;
- } else if (MBB->isLandingPad() &&
- MBB != OrigDestA && MBB != OrigDestB) {
- ++SI;
- } else {
- // Otherwise, this is a superfluous edge, remove it.
+ if (!SeenMBBs.insert(MBB) ||
+ (MBB != DestA && MBB != DestB && !MBB->isLandingPad())) {
+ // This is a superfluous edge, remove it.
SI = removeSuccessor(SI);
- MadeChange = true;
+ Changed = true;
+ } else {
+ ++SI;
}
}
- if (!AddedFallThrough)
- assert(DestA == 0 && DestB == 0 && "MachineCFG is missing edges!");
- else if (isCond)
- assert(DestA == 0 && "MachineCFG is missing edges!");
-
- return MadeChange;
+ return Changed;
}
/// findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 91d3635..597d51d 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -117,17 +117,15 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) {
unsigned LookAheadLeft = 5;
- while (LookAheadLeft--) {
+ while (LookAheadLeft) {
+ // Skip over dbg_value's.
+ while (I != E && I->isDebugValue())
+ ++I;
+
if (I == E)
// Reached end of block, register is obviously dead.
return true;
- if (I->isDebugValue()) {
- // These must not count against the limit.
- ++LookAheadLeft;
- ++I;
- continue;
- }
bool SeenDef = false;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I->getOperand(i);
@@ -143,6 +141,8 @@ bool MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
// See a def of Reg (or an alias) before encountering any use, it's
// trivially dead.
return true;
+
+ --LookAheadLeft;
++I;
}
return false;
@@ -294,8 +294,12 @@ bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
// Look for trivial copy coalescing opportunities.
- if (PerformTrivialCoalescing(MI, MBB))
+ if (PerformTrivialCoalescing(MI, MBB)) {
+ // After coalescing MI itself may become a copy.
+ if (isCopy(MI, TII))
+ continue;
FoundCSE = VNT.count(MI);
+ }
}
// FIXME: commute commutable instructions?
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5772b2f..f6cc71f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -460,9 +460,7 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
SmallString<60> Name;
raw_svector_ostream(Name)
<< Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
- if (isLinkerPrivate)
- return Ctx.GetOrCreateSymbol(Name.str());
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index af48e9e..ad4f01b 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -44,6 +44,10 @@ public:
MMIAddrLabelMapCallbackPtr() : Map(0) {}
MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
+ void setPtr(BasicBlock *BB) {
+ ValueHandleBase::operator=(BB);
+ }
+
void setMap(MMIAddrLabelMap *map) { Map = map; }
virtual void deleted();
@@ -209,7 +213,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
// If New is not address taken, just move our symbol over to it.
if (NewEntry.Symbols.isNull()) {
- BBCallbacks[OldEntry.Index] = New; // Update the callback.
+ BBCallbacks[OldEntry.Index].setPtr(New); // Update the callback.
NewEntry = OldEntry; // Set New's entry.
return;
}
diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp
index acb6869..41fc204 100644
--- a/lib/CodeGen/OptimizeExts.cpp
+++ b/lib/CodeGen/OptimizeExts.cpp
@@ -73,6 +73,9 @@ FunctionPass *llvm::createOptimizeExtsPass() { return new OptimizeExts(); }
/// the source, and if the source value is preserved as a sub-register of
/// the result, then replace all reachable uses of the source with the subreg
/// of the result.
+/// Do not generate an EXTRACT that is used only in a debug use, as this
+/// changes the code. Since this code does not currently share EXTRACTs, just
+/// ignore all debug uses.
bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
bool Changed = false;
@@ -84,17 +87,17 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
- MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg);
- if (++UI == MRI->use_end())
+ MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
+ if (++UI == MRI->use_nodbg_end())
// No other uses.
return false;
// Ok, the source has other uses. See if we can replace the other uses
// with use of the result of the extension.
SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
- UI = MRI->use_begin(DstReg);
- for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
- ++UI)
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
ReachedBBs.insert(UI->getParent());
bool ExtendLife = true;
@@ -103,9 +106,9 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// Uses that the result of the instruction can reach.
SmallVector<MachineOperand*, 8> ExtendedUses;
- UI = MRI->use_begin(SrcReg);
- for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
- ++UI) {
+ UI = MRI->use_nodbg_begin(SrcReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI) {
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = &*UI;
if (UseMI == MI)
@@ -147,9 +150,9 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB,
// Look for PHI uses of the extended result, we don't want to extend the
// liveness of a PHI input. It breaks all kinds of assumptions down
// stream. A PHI use is expected to be the kill of its source values.
- UI = MRI->use_begin(DstReg);
- for (MachineRegisterInfo::use_iterator UE = MRI->use_end(); UI != UE;
- ++UI)
+ UI = MRI->use_nodbg_begin(DstReg);
+ for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
+ UI != UE; ++UI)
if (UI->isPHI())
PHIBBs.insert(UI->getParent());
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 8bbe0a7..f0057ce 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -74,7 +74,7 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
E = ImpDefs.end(); I != E; ++I) {
MachineInstr *DefMI = *I;
unsigned DefReg = DefMI->getOperand(0).getReg();
- if (MRI->use_empty(DefReg))
+ if (MRI->use_nodbg_empty(DefReg))
DefMI->eraseFromParent();
}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 70e91aa..2d49beb 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -665,7 +665,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
/// ReconstructLiveInterval - Recompute a live interval from scratch.
void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
- BumpPtrAllocator& Alloc = LIs->getVNInfoAllocator();
+ VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator();
// Clear the old ranges and valnos;
LI->clear();
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 2c69065..0ef041e 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -118,8 +118,8 @@ namespace {
bool isVirtRegModified(unsigned Reg) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!");
- assert(Reg - TargetRegisterInfo::FirstVirtualRegister < VirtRegModified.size()
- && "Illegal virtual register!");
+ assert(Reg - TargetRegisterInfo::FirstVirtualRegister <
+ VirtRegModified.size() && "Illegal virtual register!");
return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister];
}
@@ -135,15 +135,16 @@ namespace {
if (PhysRegsUseOrder.empty() ||
PhysRegsUseOrder.back() == Reg) return; // Already most recently used
- for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i)
- if (areRegsEqual(Reg, PhysRegsUseOrder[i-1])) {
- unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
- PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
- // Add it to the end of the list
- PhysRegsUseOrder.push_back(RegMatch);
- if (RegMatch == Reg)
- return; // Found an exact match, exit early
- }
+ for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) {
+ unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle
+ if (!areRegsEqual(Reg, RegMatch)) continue;
+
+ PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1);
+ // Add it to the end of the list
+ PhysRegsUseOrder.push_back(RegMatch);
+ if (RegMatch == Reg)
+ return; // Found an exact match, exit early
+ }
}
public:
@@ -267,7 +268,7 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
RC->getAlignment());
- // Assign the slot...
+ // Assign the slot.
StackSlotForVirtReg[VirtReg] = FrameIdx;
return FrameIdx;
}
@@ -337,15 +338,19 @@ void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I,
assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!");
if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs)
spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg);
- } else {
- // If the selected register aliases any other registers, we must make
- // sure that one of the aliases isn't alive.
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet)
- if (PhysRegsUsed[*AliasSet] != -1 && // Spill aliased register.
- PhysRegsUsed[*AliasSet] != -2) // If allocatable.
- if (PhysRegsUsed[*AliasSet])
- spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
+ return;
+ }
+
+ // If the selected register aliases any other registers, we must make
+ // sure that one of the aliases isn't alive.
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register.
+ PhysRegsUsed[*AliasSet] == -2) // If allocatable.
+ continue;
+
+ if (PhysRegsUsed[*AliasSet])
+ spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet);
}
}
@@ -410,58 +415,63 @@ unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I,
// First check to see if we have a free register of the requested type...
unsigned PhysReg = NoFree ? 0 : getFreeReg(RC);
+ if (PhysReg != 0) {
+ // Assign the register.
+ assignVirtToPhysReg(VirtReg, PhysReg);
+ return PhysReg;
+ }
+
// If we didn't find an unused register, scavenge one now!
- if (PhysReg == 0) {
- assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
-
- // Loop over all of the preallocated registers from the least recently used
- // to the most recently used. When we find one that is capable of holding
- // our register, use it.
- for (unsigned i = 0; PhysReg == 0; ++i) {
- assert(i != PhysRegsUseOrder.size() &&
- "Couldn't find a register of the appropriate class!");
-
- unsigned R = PhysRegsUseOrder[i];
-
- // We can only use this register if it holds a virtual register (ie, it
- // can be spilled). Do not use it if it is an explicitly allocated
- // physical register!
- assert(PhysRegsUsed[R] != -1 &&
- "PhysReg in PhysRegsUseOrder, but is not allocated?");
- if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
- // If the current register is compatible, use it.
- if (RC->contains(R)) {
- PhysReg = R;
- break;
- } else {
- // If one of the registers aliased to the current register is
- // compatible, use it.
- for (const unsigned *AliasIt = TRI->getAliasSet(R);
- *AliasIt; ++AliasIt) {
- if (RC->contains(*AliasIt) &&
- // If this is pinned down for some reason, don't use it. For
- // example, if CL is pinned, and we run across CH, don't use
- // CH as justification for using scavenging ECX (which will
- // fail).
- PhysRegsUsed[*AliasIt] != 0 &&
-
- // Make sure the register is allocatable. Don't allocate SIL on
- // x86-32.
- PhysRegsUsed[*AliasIt] != -2) {
- PhysReg = *AliasIt; // Take an aliased register
- break;
- }
- }
- }
+ assert(!PhysRegsUseOrder.empty() && "No allocated registers??");
+
+ // Loop over all of the preallocated registers from the least recently used
+ // to the most recently used. When we find one that is capable of holding
+ // our register, use it.
+ for (unsigned i = 0; PhysReg == 0; ++i) {
+ assert(i != PhysRegsUseOrder.size() &&
+ "Couldn't find a register of the appropriate class!");
+
+ unsigned R = PhysRegsUseOrder[i];
+
+ // We can only use this register if it holds a virtual register (ie, it
+ // can be spilled). Do not use it if it is an explicitly allocated
+ // physical register!
+ assert(PhysRegsUsed[R] != -1 &&
+ "PhysReg in PhysRegsUseOrder, but is not allocated?");
+ if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) {
+ // If the current register is compatible, use it.
+ if (RC->contains(R)) {
+ PhysReg = R;
+ break;
+ }
+
+ // If one of the registers aliased to the current register is
+ // compatible, use it.
+ for (const unsigned *AliasIt = TRI->getAliasSet(R);
+ *AliasIt; ++AliasIt) {
+ if (!RC->contains(*AliasIt)) continue;
+
+ // If this is pinned down for some reason, don't use it. For
+ // example, if CL is pinned, and we run across CH, don't use
+ // CH as justification for using scavenging ECX (which will
+ // fail).
+ if (PhysRegsUsed[*AliasIt] == 0) continue;
+
+ // Make sure the register is allocatable. Don't allocate SIL on
+ // x86-32.
+ if (PhysRegsUsed[*AliasIt] == -2) continue;
+
+ PhysReg = *AliasIt; // Take an aliased register
+ break;
}
}
+ }
- assert(PhysReg && "Physical register not assigned!?!?");
+ assert(PhysReg && "Physical register not assigned!?!?");
- // At this point PhysRegsUseOrder[i] is the least recently used register of
- // compatible register class. Spill it to memory and reap its remains.
- spillPhysReg(MBB, I, PhysReg);
- }
+ // At this point PhysRegsUseOrder[i] is the least recently used register of
+ // compatible register class. Spill it to memory and reap its remains.
+ spillPhysReg(MBB, I, PhysReg);
// Now that we know which register we need to assign this to, do it now!
assignVirtToPhysReg(VirtReg, PhysReg);
@@ -543,17 +553,17 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
}
for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
*SubRegs; ++SubRegs) {
- if (!ReloadedRegs.insert(*SubRegs)) {
- std::string msg;
- raw_string_ostream Msg(msg);
- Msg << "Ran out of registers during register allocation!";
- if (MI->isInlineAsm()) {
- Msg << "\nPlease check your inline asm statement for invalid "
- << "constraints:\n";
- MI->print(Msg, TM);
- }
- llvm_report_error(Msg.str());
+ if (ReloadedRegs.insert(*SubRegs)) continue;
+
+ std::string msg;
+ raw_string_ostream Msg(msg);
+ Msg << "Ran out of registers during register allocation!";
+ if (MI->isInlineAsm()) {
+ Msg << "\nPlease check your inline asm statement for invalid "
+ << "constraints:\n";
+ MI->print(Msg, TM);
}
+ llvm_report_error(Msg.str());
}
return MI;
@@ -563,7 +573,7 @@ MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI,
/// read/mod/write register, i.e. update partial register.
static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
+ MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
MO.isDef() && !MO.isDead())
return true;
@@ -575,7 +585,7 @@ static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) {
/// read/mod/write register, i.e. update partial register.
static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
+ MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() &&
!MO.isDef() && MO.isKill())
return true;
@@ -606,7 +616,7 @@ static bool precedes(MachineBasicBlock::iterator A,
/// ComputeLocalLiveness - Computes liveness of registers within a basic
/// block, setting the killed/dead flags as appropriate.
void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
- MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
+ MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
// Keep track of the most recently seen previous use or def of each reg,
// so that we can update them with dead/kill markers.
DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > LastUseDef;
@@ -614,58 +624,60 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
I != E; ++I) {
if (I->isDebugValue())
continue;
+
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = I->getOperand(i);
+ MachineOperand &MO = I->getOperand(i);
// Uses don't trigger any flags, but we need to save
// them for later. Also, we have to process these
// _before_ processing the defs, since an instr
// uses regs before it defs them.
- if (MO.isReg() && MO.getReg() && MO.isUse()) {
- LastUseDef[MO.getReg()] = std::make_pair(I, i);
-
+ if (!MO.isReg() || !MO.getReg() || !MO.isUse())
+ continue;
+
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
+
+ const unsigned *Aliases = TRI->getAliasSet(MO.getReg());
+ if (Aliases == 0)
+ continue;
+
+ while (*Aliases) {
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ alias = LastUseDef.find(*Aliases);
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue;
+ if (alias != LastUseDef.end() && alias->second.first != I)
+ LastUseDef[*Aliases] = std::make_pair(I, i);
- const unsigned* Aliases = TRI->getAliasSet(MO.getReg());
- if (Aliases) {
- while (*Aliases) {
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- alias = LastUseDef.find(*Aliases);
-
- if (alias != LastUseDef.end() && alias->second.first != I)
- LastUseDef[*Aliases] = std::make_pair(I, i);
-
- ++Aliases;
- }
- }
+ ++Aliases;
}
}
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = I->getOperand(i);
+ MachineOperand &MO = I->getOperand(i);
// Defs others than 2-addr redefs _do_ trigger flag changes:
// - A def followed by a def is dead
// - A use followed by a def is a kill
- if (MO.isReg() && MO.getReg() && MO.isDef()) {
- DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
- last = LastUseDef.find(MO.getReg());
- if (last != LastUseDef.end()) {
- // Check if this is a two address instruction. If so, then
- // the def does not kill the use.
- if (last->second.first == I &&
- I->isRegTiedToUseOperand(i))
- continue;
-
- MachineOperand& lastUD =
- last->second.first->getOperand(last->second.second);
- if (lastUD.isDef())
- lastUD.setIsDead(true);
- else
- lastUD.setIsKill(true);
- }
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue;
+
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
+ last = LastUseDef.find(MO.getReg());
+ if (last != LastUseDef.end()) {
+ // Check if this is a two address instruction. If so, then
+ // the def does not kill the use.
+ if (last->second.first == I &&
+ I->isRegTiedToUseOperand(i))
+ continue;
- LastUseDef[MO.getReg()] = std::make_pair(I, i);
+ MachineOperand &lastUD =
+ last->second.first->getOperand(last->second.second);
+ if (lastUD.isDef())
+ lastUD.setIsDead(true);
+ else
+ lastUD.setIsKill(true);
}
+
+ LastUseDef[MO.getReg()] = std::make_pair(I, i);
}
}
@@ -687,9 +699,9 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
// in the block and determine if it is dead.
for (DenseMap<unsigned, std::pair<MachineInstr*, unsigned> >::iterator
I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) {
- MachineInstr* MI = I->second.first;
+ MachineInstr *MI = I->second.first;
unsigned idx = I->second.second;
- MachineOperand& MO = MI->getOperand(idx);
+ MachineOperand &MO = MI->getOperand(idx);
bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg());
@@ -712,20 +724,21 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) {
// Two cases:
// - used in another block
// - used in the same block before it is defined (loop)
- if (UI->getParent() != &MBB ||
- (MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) {
- if (UI->isDebugValue()) {
- UsedByDebugValueOnly = true;
- continue;
- }
-
- // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
- UsedInMultipleBlocks.set(MO.getReg() -
- TargetRegisterInfo::FirstVirtualRegister);
- usedOutsideBlock = true;
- UsedByDebugValueOnly = false;
- break;
+ if (UI->getParent() == &MBB &&
+ !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI)))
+ continue;
+
+ if (UI->isDebugValue()) {
+ UsedByDebugValueOnly = true;
+ continue;
}
+
+ // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone.
+ UsedInMultipleBlocks.set(MO.getReg() -
+ TargetRegisterInfo::FirstVirtualRegister);
+ usedOutsideBlock = true;
+ UsedByDebugValueOnly = false;
+ break;
}
if (UsedByDebugValueOnly)
@@ -770,11 +783,11 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
AddToPhysRegsUseOrder(Reg);
for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
*SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- AddToPhysRegsUseOrder(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
+ if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
}
}
@@ -813,16 +826,16 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
SmallVector<unsigned, 8> Kills;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (MO.isReg() && MO.isKill()) {
- if (!MO.isImplicit())
- Kills.push_back(MO.getReg());
- else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
- // These are extra physical register kills when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- Kills.push_back(MO.getReg());
- }
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isKill()) continue;
+
+ if (!MO.isImplicit())
+ Kills.push_back(MO.getReg());
+ else if (!isReadModWriteImplicitKill(MI, MO.getReg()))
+ // These are extra physical register kills when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ Kills.push_back(MO.getReg());
}
// If any physical regs are earlyclobber, spill any value they might
@@ -830,45 +843,45 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
// If any virtual regs are earlyclobber, allocate them now (before
// freeing inputs that are killed).
if (MI->isInlineAsm()) {
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef() && MO.isEarlyClobber() &&
- MO.getReg()) {
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) =
- std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
- MO.setReg(DestPhysReg); // Assign the earlyclobber register
- } else {
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(*SubRegs);
- }
- }
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() ||
+ !MO.getReg())
+ continue;
+
+ if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg)))
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) =
+ std::make_pair((MachineInstr*)0, 0);
+ DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n");
+ MO.setReg(DestPhysReg); // Assign the earlyclobber register
+ } else {
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] == -2) continue;
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
}
}
}
@@ -894,7 +907,7 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
//
SmallSet<unsigned, 4> ReloadedRegs;
for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& MO = MI->getOperand(i);
+ MachineOperand &MO = MI->getOperand(i);
// here we are looking for only used operands (never def&use)
if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -923,18 +936,18 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
"Silently clearing a virtual register?");
}
- if (PhysReg) {
- DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg)
- << "[%reg" << VirtReg <<"], removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- DEBUG(dbgs() << " Last use of "
- << TRI->getName(*SubRegs) << "[%reg" << VirtReg
- <<"], removing it from live set\n");
- removePhysReg(*SubRegs);
- }
+ if (!PhysReg) continue;
+
+ DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg)
+ << "[%reg" << VirtReg <<"], removing it from live set\n");
+ removePhysReg(PhysReg);
+ for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] != -2) {
+ DEBUG(dbgs() << " Last use of "
+ << TRI->getName(*SubRegs) << "[%reg" << VirtReg
+ <<"], removing it from live set\n");
+ removePhysReg(*SubRegs);
}
}
}
@@ -942,30 +955,31 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
// Loop over all of the operands of the instruction, spilling registers that
// are defined, and marking explicit destinations in the PhysRegsUsed map.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef() && !MO.isImplicit() && MO.getReg() &&
- !MO.isEarlyClobber() &&
- TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- unsigned Reg = MO.getReg();
- if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
- // These are extra physical register defs when a sub-register
- // is defined (def of a sub-register is a read/mod/write of the
- // larger registers). Ignore.
- if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
-
- MF->getRegInfo().setPhysRegUsed(Reg);
- spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
- PhysRegsUsed[Reg] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(Reg);
-
- for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
- *SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- AddToPhysRegsUseOrder(*SubRegs);
- }
- }
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() ||
+ MO.isEarlyClobber() ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ continue;
+
+ unsigned Reg = MO.getReg();
+ if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP.
+ // These are extra physical register defs when a sub-register
+ // is defined (def of a sub-register is a read/mod/write of the
+ // larger registers). Ignore.
+ if (isReadModWriteImplicitDef(MI, MO.getReg())) continue;
+
+ MF->getRegInfo().setPhysRegUsed(Reg);
+ spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg
+ PhysRegsUsed[Reg] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(Reg);
+
+ for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
+ *SubRegs; ++SubRegs) {
+ if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ AddToPhysRegsUseOrder(*SubRegs);
}
}
@@ -982,18 +996,18 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
MF->getRegInfo().setPhysRegUsed(Reg);
for (const unsigned *SubRegs = TRI->getSubRegisters(Reg);
*SubRegs; ++SubRegs) {
- if (PhysRegsUsed[*SubRegs] != -2) {
- AddToPhysRegsUseOrder(*SubRegs);
- PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
- MF->getRegInfo().setPhysRegUsed(*SubRegs);
- }
+ if (PhysRegsUsed[*SubRegs] == -2) continue;
+
+ AddToPhysRegsUseOrder(*SubRegs);
+ PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now
+ MF->getRegInfo().setPhysRegUsed(*SubRegs);
}
}
}
SmallVector<unsigned, 8> DeadDefs;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
+ MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDead())
DeadDefs.push_back(MO.getReg());
}
@@ -1004,45 +1018,46 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
// we need to scavenge a register.
//
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand& MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef() && MO.getReg() &&
- !MO.isEarlyClobber() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
- unsigned DestVirtReg = MO.getReg();
- unsigned DestPhysReg;
-
- // If DestVirtReg already has a value, use it.
- if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
- // If this is a copy try to reuse the input as the output;
- // that will make the copy go away.
- // If this is a copy, the source reg is a phys reg, and
- // that reg is available, use that phys reg for DestPhysReg.
- // If this is a copy, the source reg is a virtual reg, and
- // the phys reg that was assigned to that virtual reg is now
- // available, use that phys reg for DestPhysReg. (If it's now
- // available that means this was the last use of the source.)
- if (isCopy &&
- TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
- isPhysRegAvailable(SrcCopyReg)) {
- DestPhysReg = SrcCopyReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else if (isCopy &&
- TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
- SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
- MF->getRegInfo().getRegClass(DestVirtReg)->
- contains(SrcCopyPhysReg)) {
- DestPhysReg = SrcCopyPhysReg;
- assignVirtToPhysReg(DestVirtReg, DestPhysReg);
- } else
- DestPhysReg = getReg(MBB, MI, DestVirtReg);
- }
- MF->getRegInfo().setPhysRegUsed(DestPhysReg);
- markVirtRegModified(DestVirtReg);
- getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
- DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
- << " to %reg" << DestVirtReg << "\n");
- MO.setReg(DestPhysReg); // Assign the output register
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() ||
+ MO.isEarlyClobber() ||
+ !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+
+ unsigned DestVirtReg = MO.getReg();
+ unsigned DestPhysReg;
+
+ // If DestVirtReg already has a value, use it.
+ if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) {
+ // If this is a copy try to reuse the input as the output;
+ // that will make the copy go away.
+ // If this is a copy, the source reg is a phys reg, and
+ // that reg is available, use that phys reg for DestPhysReg.
+ // If this is a copy, the source reg is a virtual reg, and
+ // the phys reg that was assigned to that virtual reg is now
+ // available, use that phys reg for DestPhysReg. (If it's now
+ // available that means this was the last use of the source.)
+ if (isCopy &&
+ TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) &&
+ isPhysRegAvailable(SrcCopyReg)) {
+ DestPhysReg = SrcCopyReg;
+ assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+ } else if (isCopy &&
+ TargetRegisterInfo::isVirtualRegister(SrcCopyReg) &&
+ SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) &&
+ MF->getRegInfo().getRegClass(DestVirtReg)->
+ contains(SrcCopyPhysReg)) {
+ DestPhysReg = SrcCopyPhysReg;
+ assignVirtToPhysReg(DestVirtReg, DestPhysReg);
+ } else
+ DestPhysReg = getReg(MBB, MI, DestVirtReg);
}
+ MF->getRegInfo().setPhysRegUsed(DestPhysReg);
+ markVirtRegModified(DestVirtReg);
+ getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0);
+ DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg)
+ << " to %reg" << DestVirtReg << "\n");
+ MO.setReg(DestPhysReg); // Assign the output register
}
// If this instruction defines any registers that are immediately dead,
@@ -1059,21 +1074,20 @@ void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) {
} else if (PhysRegsUsed[PhysReg] == -2) {
// Unallocatable register dead, ignore.
continue;
- }
-
- if (PhysReg) {
- DEBUG(dbgs() << " Register " << TRI->getName(PhysReg)
- << " [%reg" << VirtReg
- << "] is never used, removing it from live set\n");
- removePhysReg(PhysReg);
- for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
- *AliasSet; ++AliasSet) {
- if (PhysRegsUsed[*AliasSet] != -2) {
- DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet)
- << " [%reg" << *AliasSet
- << "] is never used, removing it from live set\n");
- removePhysReg(*AliasSet);
- }
+ } else if (!PhysReg)
+ continue;
+
+ DEBUG(dbgs() << " Register " << TRI->getName(PhysReg)
+ << " [%reg" << VirtReg
+ << "] is never used, removing it from live set\n");
+ removePhysReg(PhysReg);
+ for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg);
+ *AliasSet; ++AliasSet) {
+ if (PhysRegsUsed[*AliasSet] != -2) {
+ DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet)
+ << " [%reg" << *AliasSet
+ << "] is never used, removing it from live set\n");
+ removePhysReg(*AliasSet);
}
}
}
@@ -1143,8 +1157,10 @@ bool RALocal::runOnMachineFunction(MachineFunction &Fn) {
StackSlotForVirtReg.grow(LastVirtReg);
Virt2PhysRegMap.grow(LastVirtReg);
Virt2LastUseMap.grow(LastVirtReg);
- VirtRegModified.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
- UsedInMultipleBlocks.resize(LastVirtReg+1-TargetRegisterInfo::FirstVirtualRegister);
+ VirtRegModified.resize(LastVirtReg+1 -
+ TargetRegisterInfo::FirstVirtualRegister);
+ UsedInMultipleBlocks.resize(LastVirtReg+1 -
+ TargetRegisterInfo::FirstVirtualRegister);
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index e532ade..ecc49e2 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -248,48 +248,47 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
unsigned DataLatency = SU->Latency;
for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
SUnit *UseSU = UseList[i];
- if (UseSU != SU) {
- unsigned LDataLatency = DataLatency;
- // Optionally add in a special extra latency for nodes that
- // feed addresses.
- // TODO: Do this for register aliases too.
- // TODO: Perhaps we should get rid of
- // SpecialAddressLatency and just move this into
- // adjustSchedDependency for the targets that care about
- // it.
- if (SpecialAddressLatency != 0 && !UnitLatencies) {
- MachineInstr *UseMI = UseSU->getInstr();
- const TargetInstrDesc &UseTID = UseMI->getDesc();
- int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
- assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
- if ((UseTID.mayLoad() || UseTID.mayStore()) &&
- (unsigned)RegUseIndex < UseTID.getNumOperands() &&
- UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
- LDataLatency += SpecialAddressLatency;
- }
- // Adjust the dependence latency using operand def/use
- // information (if any), and then allow the target to
- // perform its own adjustments.
- const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
- if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, (SDep &)dep);
- ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
- }
- UseSU->addPred(dep);
+ if (UseSU == SU)
+ continue;
+ unsigned LDataLatency = DataLatency;
+ // Optionally add in a special extra latency for nodes that
+ // feed addresses.
+ // TODO: Do this for register aliases too.
+ // TODO: Perhaps we should get rid of
+ // SpecialAddressLatency and just move this into
+ // adjustSchedDependency for the targets that care about it.
+ if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ MachineInstr *UseMI = UseSU->getInstr();
+ const TargetInstrDesc &UseTID = UseMI->getDesc();
+ int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
+ assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
+ if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ (unsigned)RegUseIndex < UseTID.getNumOperands() &&
+ UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+ LDataLatency += SpecialAddressLatency;
}
+ // Adjust the dependence latency using operand def/use
+ // information (if any), and then allow the target to
+ // perform its own adjustments.
+ const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
+ if (!UnitLatencies) {
+ ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+ ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
+ }
+ UseSU->addPred(dep);
}
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
std::vector<SUnit *> &UseList = Uses[*Alias];
for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
SUnit *UseSU = UseList[i];
- if (UseSU != SU) {
- const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
- if (!UnitLatencies) {
- ComputeOperandLatency(SU, UseSU, (SDep &)dep);
- ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
- }
- UseSU->addPred(dep);
+ if (UseSU == SU)
+ continue;
+ const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
+ if (!UnitLatencies) {
+ ComputeOperandLatency(SU, UseSU, (SDep &)dep);
+ ST.adjustSchedDependency(SU, UseSU, (SDep &)dep);
}
+ UseSU->addPred(dep);
}
}
@@ -528,7 +527,8 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
- int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), DefIdx);
+ int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
+ DefIdx);
if (DefCycle >= 0) {
MachineInstr *UseMI = Use->getInstr();
const unsigned UseClass = UseMI->getDesc().getSchedClass();
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index aa283ad..a336e0a 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5022,18 +5022,6 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- // Try to infer better alignment information than the load already has.
- if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
- if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
- LD->getValueType(0),
- Chain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
- LD->isVolatile(), LD->isNonTemporal(), Align);
- }
- }
-
// If load is not volatile and there are no uses of the loaded value (and
// the updated indexed value in case of indexed loads), change uses of the
// chain value into uses of the chain input (i.e. delete the dead load).
@@ -5099,6 +5087,18 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
}
}
+ // Try to infer better alignment information than the load already has.
+ if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > LD->getAlignment())
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getSrcValue(),
+ LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->isVolatile(), LD->isNonTemporal(), Align);
+ }
+ }
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5250,17 +5250,6 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
SDValue Value = ST->getValue();
SDValue Ptr = ST->getBasePtr();
- // Try to infer better alignment information than the store already has.
- if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
- if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
- if (Align > ST->getAlignment())
- return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
- ST->isVolatile(), ST->isNonTemporal(), Align);
- }
- }
-
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
@@ -5351,6 +5340,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // Try to infer better alignment information than the store already has.
+ if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
+ if (Align > ST->getAlignment())
+ return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
+ Ptr, ST->getSrcValue(),
+ ST->getSrcValueOffset(), ST->getMemoryVT(),
+ ST->isVolatile(), ST->isNonTemporal(), Align);
+ }
+ }
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 3fc30ff..e4e9ef4 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -340,10 +340,9 @@ bool FastISel::SelectCall(User *I) {
StaticAllocaMap.find(AI);
if (SI == StaticAllocaMap.end()) break; // VLAs.
int FI = SI->second;
- if (MMI) {
- if (MDNode *Dbg = DI->getMetadata("dbg"))
- MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
- }
+ if (MDNode *Dbg = DI->getDbgMetadata())
+ MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+
// Building the map above is target independent. Generating DBG_VALUE
// inline is target dependent; do this now.
(void)TargetSelectInstruction(cast<Instruction>(I));
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 50f4c32..4fb2aa2 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -281,8 +281,17 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) {
GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
GlobalVariable *GV = dyn_cast<GlobalVariable>(V);
- assert ((GV || isa<ConstantPointerNull>(V)) &&
- "TypeInfo must be a global variable or NULL");
+
+ if (GV && GV->getName() == ".llvm.eh.catch.all.value") {
+ assert(GV->hasInitializer() &&
+ "The EH catch-all value must have an initializer");
+ Value *Init = GV->getInitializer();
+ GV = dyn_cast<GlobalVariable>(Init);
+ if (!GV) V = cast<ConstantPointerNull>(Init);
+ }
+
+ assert((GV || isa<ConstantPointerNull>(V)) &&
+ "TypeInfo must be a global variable or NULL");
return GV;
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index fda094d3..28ba343 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -264,7 +264,8 @@ void
InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug) {
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Flag &&
"Chain and flag operands should occur at end of operand list!");
@@ -295,7 +296,11 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
}
}
- MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef));
+ MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef,
+ false/*isImp*/, false/*isKill*/,
+ false/*isDead*/, false/*isUndef*/,
+ false/*isEarlyClobber*/,
+ 0/*SubReg*/, IsDebug));
}
/// AddOperand - Add the specified operand to the specified machine instr. II
@@ -305,9 +310,10 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap) {
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug) {
if (Op.isMachineOpcode()) {
- AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
} else if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
MI->addOperand(MachineOperand::CreateImm(C->getSExtValue()));
} else if (ConstantFPSDNode *F = dyn_cast<ConstantFPSDNode>(Op)) {
@@ -356,7 +362,7 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
assert(Op.getValueType() != MVT::Other &&
Op.getValueType() != MVT::Flag &&
"Chain and flag operands should occur at end of operand list!");
- AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap);
+ AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, IsDebug);
}
}
@@ -498,164 +504,156 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
assert(isNew && "Node emitted out of order - early");
}
-/// EmitDbgValue - Generate any debug info that refers to this Node. Constant
-/// dbg_value is not handled here.
-void
-InstrEmitter::EmitDbgValue(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap,
- SDDbgValue *sd) {
- if (!Node->getHasDebugValue())
- return;
- if (!sd)
- return;
- assert(sd->getKind() == SDDbgValue::SDNODE);
- unsigned VReg = getVR(SDValue(sd->getSDNode(), sd->getResNo()), VRBaseMap);
- const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
- DebugLoc DL = sd->getDebugLoc();
- MachineInstr *MI;
- if (VReg) {
- MI = BuildMI(*MF, DL, II).addReg(VReg, RegState::Debug).
- addImm(sd->getOffset()).
- addMetadata(sd->getMDPtr());
- } else {
- // Insert an Undef so we can see what we dropped.
- MI = BuildMI(*MF, DL, II).addReg(0U).addImm(sd->getOffset()).
- addMetadata(sd->getMDPtr());
- }
- MBB->insert(InsertPos, MI);
-}
-
-/// EmitDbgValue - Generate debug info that does not refer to a SDNode.
-void
-InstrEmitter::EmitDbgValue(SDDbgValue *sd,
+/// EmitDbgValue - Generate machine instruction for a dbg_value node.
+///
+MachineInstr *InstrEmitter::EmitDbgValue(SDDbgValue *SD,
+ MachineBasicBlock *InsertBB,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
- if (!sd)
- return;
+ uint64_t Offset = SD->getOffset();
+ MDNode* MDPtr = SD->getMDPtr();
+ DebugLoc DL = SD->getDebugLoc();
+
const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
- uint64_t Offset = sd->getOffset();
- MDNode* mdPtr = sd->getMDPtr();
- SDDbgValue::DbgValueKind kind = sd->getKind();
- DebugLoc DL = sd->getDebugLoc();
- MachineInstr* MI;
- if (kind == SDDbgValue::CONST) {
- Value *V = sd->getConst();
+ MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
+ if (SD->getKind() == SDDbgValue::SDNODE) {
+ AddOperand(&*MIB, SDValue(SD->getSDNode(), SD->getResNo()),
+ (*MIB).getNumOperands(), &II, VRBaseMap, true /*IsDebug*/);
+ } else if (SD->getKind() == SDDbgValue::CONST) {
+ Value *V = SD->getConst();
if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
- MI = BuildMI(*MF, DL, II).addImm(CI->getZExtValue()).
- addImm(Offset).addMetadata(mdPtr);
+ MIB.addImm(CI->getSExtValue());
} else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
- MI = BuildMI(*MF, DL, II).addFPImm(CF).
- addImm(Offset).addMetadata(mdPtr);
+ MIB.addFPImm(CF);
} else {
// Could be an Undef. In any case insert an Undef so we can see what we
// dropped.
- MI = BuildMI(*MF, DL, II).addReg(0U).
- addImm(Offset).addMetadata(mdPtr);
+ MIB.addReg(0U);
}
- } else if (kind == SDDbgValue::FRAMEIX) {
- unsigned FrameIx = sd->getFrameIx();
+ } else if (SD->getKind() == SDDbgValue::FRAMEIX) {
+ unsigned FrameIx = SD->getFrameIx();
// Stack address; this needs to be lowered in target-dependent fashion.
// FIXME test that the target supports this somehow; if not emit Undef.
// Create a pseudo for EmitInstrWithCustomInserter's consumption.
- MI = BuildMI(*MF, DL, II).addImm(FrameIx).
- addImm(Offset).addMetadata(mdPtr);
- MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
- InsertPos = MBB->end();
- return;
+ MIB.addImm(FrameIx).addImm(Offset).addMetadata(MDPtr);
+ abort();
+ TLI->EmitInstrWithCustomInserter(&*MIB, InsertBB, EM);
+ return 0;
} else {
// Insert an Undef so we can see what we dropped.
- MI = BuildMI(*MF, DL, II).addReg(0U).
- addImm(Offset).addMetadata(mdPtr);
+ MIB.addReg(0U);
}
- MBB->insert(InsertPos, MI);
+
+ MIB.addImm(Offset).addMetadata(MDPtr);
+ return &*MIB;
}
-/// EmitNode - Generate machine code for a node and needed dependencies.
+/// EmitMachineNode - Generate machine code for a target-specific node and
+/// needed dependencies.
///
-void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
- DenseMap<SDValue, unsigned> &VRBaseMap,
- DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
- // If machine instruction
- if (Node->isMachineOpcode()) {
- unsigned Opc = Node->getMachineOpcode();
-
- // Handle subreg insert/extract specially
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG) {
- EmitSubregNode(Node, VRBaseMap);
- return;
- }
+void InstrEmitter::
+EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+ unsigned Opc = Node->getMachineOpcode();
+
+ // Handle subreg insert/extract specially
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG) {
+ EmitSubregNode(Node, VRBaseMap);
+ return;
+ }
- // Handle COPY_TO_REGCLASS specially.
- if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
- EmitCopyToRegClassNode(Node, VRBaseMap);
- return;
- }
+ // Handle COPY_TO_REGCLASS specially.
+ if (Opc == TargetOpcode::COPY_TO_REGCLASS) {
+ EmitCopyToRegClassNode(Node, VRBaseMap);
+ return;
+ }
- if (Opc == TargetOpcode::IMPLICIT_DEF)
- // We want a unique VR for each IMPLICIT_DEF use.
- return;
-
- const TargetInstrDesc &II = TII->get(Opc);
- unsigned NumResults = CountResults(Node);
- unsigned NodeOperands = CountOperands(Node);
- bool HasPhysRegOuts = (NumResults > II.getNumDefs()) &&
- II.getImplicitDefs() != 0;
+ if (Opc == TargetOpcode::IMPLICIT_DEF)
+ // We want a unique VR for each IMPLICIT_DEF use.
+ return;
+
+ const TargetInstrDesc &II = TII->get(Opc);
+ unsigned NumResults = CountResults(Node);
+ unsigned NodeOperands = CountOperands(Node);
+ bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
#ifndef NDEBUG
- unsigned NumMIOperands = NodeOperands + NumResults;
- assert((II.getNumOperands() == NumMIOperands ||
- HasPhysRegOuts || II.isVariadic()) &&
- "#operands for dag node doesn't match .td file!");
+ unsigned NumMIOperands = NodeOperands + NumResults;
+ if (II.isVariadic())
+ assert(NumMIOperands >= II.getNumOperands() &&
+ "Too few operands for a variadic node!");
+ else
+ assert(NumMIOperands >= II.getNumOperands() &&
+ NumMIOperands <= II.getNumOperands()+II.getNumImplicitDefs() &&
+ "#operands for dag node doesn't match .td file!");
#endif
- // Create the new machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
-
- // Add result register values for things that are defined by this
- // instruction.
- if (NumResults)
- CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
-
- // Emit all of the actual operands of this instruction, adding them to the
- // instruction as appropriate.
- bool HasOptPRefs = II.getNumDefs() > NumResults;
- assert((!HasOptPRefs || !HasPhysRegOuts) &&
- "Unable to cope with optional defs and phys regs defs!");
- unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
- for (unsigned i = NumSkip; i != NodeOperands; ++i)
- AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
- VRBaseMap);
-
- // Transfer all of the memory reference descriptions of this instruction.
- MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
- cast<MachineSDNode>(Node)->memoperands_end());
-
- if (II.usesCustomInsertionHook()) {
- // Insert this instruction into the basic block using a target
- // specific inserter which may returns a new basic block.
- MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
- InsertPos = MBB->end();
- } else {
- MBB->insert(InsertPos, MI);
- }
+ // Create the new machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II);
+
+ // Add result register values for things that are defined by this
+ // instruction.
+ if (NumResults)
+ CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap);
+
+ // Emit all of the actual operands of this instruction, adding them to the
+ // instruction as appropriate.
+ bool HasOptPRefs = II.getNumDefs() > NumResults;
+ assert((!HasOptPRefs || !HasPhysRegOuts) &&
+ "Unable to cope with optional defs and phys regs defs!");
+ unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0;
+ for (unsigned i = NumSkip; i != NodeOperands; ++i)
+ AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II,
+ VRBaseMap);
+
+ // Transfer all of the memory reference descriptions of this instruction.
+ MI->setMemRefs(cast<MachineSDNode>(Node)->memoperands_begin(),
+ cast<MachineSDNode>(Node)->memoperands_end());
+
+ if (II.usesCustomInsertionHook()) {
+ // Insert this instruction into the basic block using a target
+ // specific inserter which may returns a new basic block.
+ MBB = TLI->EmitInstrWithCustomInserter(MI, MBB, EM);
+ InsertPos = MBB->end();
+ return;
+ }
+
+ MBB->insert(InsertPos, MI);
- // Additional results must be an physical register def.
- if (HasPhysRegOuts) {
- for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
- unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
- if (Node->hasAnyUseOfValue(i))
- EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
- // If there are no uses, mark the register as dead now, so that
- // MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Flag value, for the benefit of targets still using
- // Flag for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
- MI->addRegisterDead(Reg, TRI);
- }
+ // Additional results must be an physical register def.
+ if (HasPhysRegOuts) {
+ for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
+ unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
+ if (Node->hasAnyUseOfValue(i))
+ EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
+ // If there are no uses, mark the register as dead now, so that
+ // MachineLICM/Sink can see that it's dead. Don't do this if the
+ // node has a Flag value, for the benefit of targets still using
+ // Flag for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ MI->addRegisterDead(Reg, TRI);
}
- return;
}
+
+ // If the instruction has implicit defs and the node doesn't, mark the
+ // implicit def as dead. If the node has any flag outputs, we don't do this
+ // because we don't know what implicit defs are being used by flagged nodes.
+ if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ if (const unsigned *IDList = II.getImplicitDefs()) {
+ for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
+ i != e; ++i)
+ MI->addRegisterDead(IDList[i-II.getNumDefs()], TRI);
+ }
+ return;
+}
+/// EmitSpecialNode - Generate machine code for a target-independent node and
+/// needed dependencies.
+void InstrEmitter::
+EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) {
switch (Node->getOpcode()) {
default:
#ifndef NDEBUG
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index eefcd73..baabb75 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -64,7 +64,8 @@ class InstrEmitter {
void AddRegisterOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug = false);
/// AddOperand - Add the specified operand to the specified machine instr. II
/// specifies the instruction information for the node, and IIOpNum is the
@@ -73,7 +74,8 @@ class InstrEmitter {
void AddOperand(MachineInstr *MI, SDValue Op,
unsigned IIOpNum,
const TargetInstrDesc *II,
- DenseMap<SDValue, unsigned> &VRBaseMap);
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ bool IsDebug = false);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
@@ -98,22 +100,23 @@ public:
/// MachineInstr.
static unsigned CountOperands(SDNode *Node);
- /// EmitDbgValue - Generate any debug info that refers to this Node. Constant
- /// dbg_value is not handled here.
- void EmitDbgValue(SDNode *Node,
- DenseMap<SDValue, unsigned> &VRBaseMap,
- SDDbgValue* sd);
-
-
- /// EmitDbgValue - Generate a constant DBG_VALUE. No node is involved.
- void EmitDbgValue(SDDbgValue* sd,
- DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+ /// EmitDbgValue - Generate machine instruction for a dbg_value node.
+ ///
+ MachineInstr *EmitDbgValue(SDDbgValue *SD,
+ MachineBasicBlock *InsertBB,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
/// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap,
- DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
+ if (Node->isMachineOpcode())
+ EmitMachineNode(Node, IsClone, IsCloned, VRBaseMap, EM);
+ else
+ EmitSpecialNode(Node, IsClone, IsCloned, VRBaseMap);
+ }
/// getBlock - Return the current basic block.
MachineBasicBlock *getBlock() { return MBB; }
@@ -124,6 +127,13 @@ public:
/// InstrEmitter - Construct an InstrEmitter and set it to start inserting
/// at the given position in the given block.
InstrEmitter(MachineBasicBlock *mbb, MachineBasicBlock::iterator insertpos);
+
+private:
+ void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM);
+ void EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap);
};
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index dbbd753..7638ea2 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -47,10 +47,12 @@ private:
uint64_t Offset;
DebugLoc DL;
unsigned Order;
+ bool Invalid;
public:
// Constructor for non-constants.
SDDbgValue(MDNode *mdP, SDNode *N, unsigned R, uint64_t off, DebugLoc dl,
- unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O) {
+ unsigned O) : mdPtr(mdP), Offset(off), DL(dl), Order(O),
+ Invalid(false) {
kind = SDNODE;
u.s.Node = N;
u.s.ResNo = R;
@@ -58,14 +60,14 @@ public:
// Constructor for constants.
SDDbgValue(MDNode *mdP, Value *C, uint64_t off, DebugLoc dl, unsigned O) :
- mdPtr(mdP), Offset(off), DL(dl), Order(O) {
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
kind = CONST;
u.Const = C;
}
// Constructor for frame indices.
SDDbgValue(MDNode *mdP, unsigned FI, uint64_t off, DebugLoc dl, unsigned O) :
- mdPtr(mdP), Offset(off), DL(dl), Order(O) {
+ mdPtr(mdP), Offset(off), DL(dl), Order(O), Invalid(false) {
kind = FRAMEIX;
u.FrameIx = FI;
}
@@ -97,6 +99,12 @@ public:
// Returns the SDNodeOrder. This is the order of the preceding node in the
// input.
unsigned getOrder() { return Order; }
+
+ // setIsInvalidated / isInvalidated - Setter / getter of the "Invalidated"
+ // property. A SDDbgValue is invalid if the SDNode that produces the value is
+ // deleted.
+ void setIsInvalidated() { Invalid = true; }
+ bool isInvalidated() { return Invalid; }
};
} // end llvm namespace
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index c13565a..e7ab2f0 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -23,6 +23,7 @@
#include "llvm/Target/TargetSubtarget.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
@@ -407,19 +408,65 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
}
}
+namespace {
+ struct OrderSorter {
+ bool operator()(const std::pair<unsigned, MachineInstr*> &A,
+ const std::pair<unsigned, MachineInstr*> &B) {
+ return A.first < B.first;
+ }
+ };
+}
+
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule use to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order))
+ return;
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (BB->empty() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, &BB->back()));
+ if (!N->getHasDebugValue())
+ return;
+ // Opportunistically insert immediate dbg_value uses, i.e. those with source
+ // order number right after the N.
+ MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
+ SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
+ for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
+ if (DVs[i]->isInvalidated())
+ continue;
+ unsigned DVOrder = DVs[i]->getOrder();
+ if (DVOrder == ++Order) {
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], BB, VRBaseMap, EM);
+ Orders.push_back(std::make_pair(DVOrder, DbgMI));
+ BB->insert(InsertPos, DbgMI);
+ DVs[i]->setIsInvalidated();
+ }
+ }
+}
+
+
/// EmitSchedule - Emit the machine code in scheduled order.
MachineBasicBlock *ScheduleDAGSDNodes::
EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
InstrEmitter Emitter(BB, InsertPos);
DenseMap<SDValue, unsigned> VRBaseMap;
DenseMap<SUnit*, unsigned> CopyVRBaseMap;
-
- // For now, any constant debug info nodes go at the beginning.
- for (SDDbgInfo::ConstDbgIterator I = DAG->DbgConstBegin(),
- E = DAG->DbgConstEnd(); I!=E; I++) {
- Emitter.EmitDbgValue(*I, EM);
- delete *I;
- }
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> Orders;
+ SmallSet<unsigned, 8> Seen;
+ bool HasDbg = DAG->hasDebugValues();
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
SUnit *SU = Sequence[i];
@@ -442,22 +489,80 @@ EmitSchedule(DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) {
N = N->getFlaggedNode())
FlaggedNodes.push_back(N);
while (!FlaggedNodes.empty()) {
+ SDNode *N = FlaggedNodes.back();
Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap, EM);
- if (FlaggedNodes.back()->getHasDebugValue())
- if (SDDbgValue *sd = DAG->GetDbgInfo(FlaggedNodes.back())) {
- Emitter.EmitDbgValue(FlaggedNodes.back(), VRBaseMap, sd);
- delete sd;
- }
+ // Remember the the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(N, DAG, Emitter, EM, VRBaseMap, Orders, Seen);
FlaggedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap, EM);
- if (SU->getNode()->getHasDebugValue())
- if (SDDbgValue *sd = DAG->GetDbgInfo(SU->getNode())) {
- Emitter.EmitDbgValue(SU->getNode(), VRBaseMap, sd);
- delete sd;
+ // Remember the the source order of the inserted instruction.
+ if (HasDbg)
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, EM, VRBaseMap, Orders,
+ Seen);
+ }
+
+ // Insert all the dbg_value which have not already been inserted in source
+ // order sequence.
+ if (HasDbg) {
+ MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin();
+ while (BBBegin != BB->end() && BBBegin->isPHI())
+ ++BBBegin;
+
+ // Sort the source order instructions and use the order to insert debug
+ // values.
+ std::sort(Orders.begin(), Orders.end(), OrderSorter());
+
+ SDDbgInfo::DbgIterator DI = DAG->DbgBegin();
+ SDDbgInfo::DbgIterator DE = DAG->DbgEnd();
+ // Now emit the rest according to source order.
+ unsigned LastOrder = 0;
+ MachineInstr *LastMI = 0;
+ for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) {
+ unsigned Order = Orders[i].first;
+ MachineInstr *MI = Orders[i].second;
+ // Insert all SDDbgValue's whose order(s) are before "Order".
+ if (!MI)
+ continue;
+ MachineBasicBlock *MIBB = MI->getParent();
+#ifndef NDEBUG
+ unsigned LastDIOrder = 0;
+#endif
+ for (; DI != DE &&
+ (*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
+#ifndef NDEBUG
+ assert((*DI)->getOrder() >= LastDIOrder &&
+ "SDDbgValue nodes must be in source order!");
+ LastDIOrder = (*DI)->getOrder();
+#endif
+ if ((*DI)->isInvalidated())
+ continue;
+ MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, MIBB, VRBaseMap, EM);
+ if (!LastOrder)
+ // Insert to start of the BB (after PHIs).
+ BB->insert(BBBegin, DbgMI);
+ else {
+ MachineBasicBlock::iterator Pos = MI;
+ MIBB->insert(llvm::next(Pos), DbgMI);
+ }
}
+ LastOrder = Order;
+ LastMI = MI;
+ }
+ // Add trailing DbgValue's before the terminator. FIXME: May want to add
+ // some of them before one or more conditional branches?
+ while (DI != DE) {
+ MachineBasicBlock *InsertBB = Emitter.getBlock();
+ MachineBasicBlock::iterator Pos= Emitter.getBlock()->getFirstTerminator();
+ if (!(*DI)->isInvalidated()) {
+ MachineInstr *DbgMI= Emitter.EmitDbgValue(*DI, InsertBB, VRBaseMap, EM);
+ InsertBB->insert(Pos, DbgMI);
+ }
+ ++DI;
+ }
}
BB = Emitter.getBlock();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ed9146d..0ba65ab 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -598,8 +598,10 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
// Remove the ordering of this node.
Ordering->remove(N);
- // And its entry in the debug info table, if any.
- DbgInfo->remove(N);
+ // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
+ SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+ for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
+ DbgVals[i]->setIsInvalidated();
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -811,6 +813,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
SelectionDAG::~SelectionDAG() {
allnodes_clear();
delete Ordering;
+ DbgInfo->clear();
delete DbgInfo;
}
@@ -839,6 +842,7 @@ void SelectionDAG::clear() {
Root = getEntryNode();
delete Ordering;
Ordering = new SDNodeOrdering();
+ DbgInfo->clear();
delete DbgInfo;
DbgInfo = new SDDbgInfo();
}
@@ -3128,11 +3132,17 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- unsigned NumElts = VT.getVectorNumElements();
- MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
- DAG.getConstant(0,
- EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts)));
+ else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
+ VT.getSimpleVT().SimpleTy == MVT::f64)
+ return DAG.getConstantFP(0.0, VT);
+ else if (VT.isVector()) {
+ unsigned NumElts = VT.getVectorNumElements();
+ MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElts)));
+ } else
+ llvm_unreachable("Expected type!");
}
assert(!VT.isVector() && "Can't handle vector type here!");
@@ -3180,51 +3190,33 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) {
return false;
}
-/// MeetsMaxMemopRequirement - Determines if the number of memory ops required
-/// to replace the memset / memcpy is below the threshold. It also returns the
-/// types of the sequence of memory ops to perform memset / memcpy.
-static
-bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
- SDValue Dst, SDValue Src,
- unsigned Limit, uint64_t Size, unsigned &Align,
- std::string &Str, bool &isSrcStr,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- isSrcStr = isMemSrcFromString(Src, Str);
- bool isSrcConst = isa<ConstantSDNode>(Src);
- EVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG);
- bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(VT);
- if (VT != MVT::Other) {
- const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
- unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
- // If source is a string constant, this will require an unaligned load.
- if (NewAlign > Align && (isSrcConst || AllowUnalign)) {
- if (Dst.getOpcode() != ISD::FrameIndex) {
- // Can't change destination alignment. It requires a unaligned store.
- if (AllowUnalign)
- VT = MVT::Other;
- } else {
- int FI = cast<FrameIndexSDNode>(Dst)->getIndex();
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- if (MFI->isFixedObjectIndex(FI)) {
- // Can't change destination alignment. It requires a unaligned store.
- if (AllowUnalign)
- VT = MVT::Other;
- } else {
- // Give the stack frame object a larger alignment if needed.
- if (MFI->getObjectAlignment(FI) < NewAlign)
- MFI->setObjectAlignment(FI, NewAlign);
- Align = NewAlign;
- }
- }
- }
- }
+/// FindOptimalMemOpLowering - Determines the optimial series memory ops
+/// to replace the memset / memcpy. Return true if the number of memory ops
+/// is below the threshold. It returns the types of the sequence of
+/// memory ops to perform memset / memcpy by reference.
+static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP,
+ SelectionDAG &DAG,
+ const TargetLowering &TLI) {
+ assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
+ "Expecting memcpy / memset source to meet alignment requirement!");
+ // If 'SrcAlign' is zero, that means the memory operation does not need load
+ // the value, i.e. memset or memcpy from constant string. Otherwise, it's
+ // the inferred alignment of the source. 'DstAlign', on the other hand, is the
+ // specified alignment of the memory operation. If it is zero, that means
+ // it's possible to change the alignment of the destination.
+ EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, SafeToUseFP, DAG);
if (VT == MVT::Other) {
- if (TLI.allowsUnalignedMemoryAccesses(MVT::i64)) {
+ VT = TLI.getPointerTy();
+ const Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+ if (DstAlign >= TLI.getTargetData()->getABITypeAlignment(Ty) ||
+ TLI.allowsUnalignedMemoryAccesses(VT)) {
VT = MVT::i64;
} else {
- switch (Align & 7) {
+ switch (DstAlign & 7) {
case 0: VT = MVT::i64; break;
case 4: VT = MVT::i32; break;
case 2: VT = MVT::i16; break;
@@ -3246,7 +3238,7 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
unsigned VTSize = VT.getSizeInBits() / 8;
while (VTSize > Size) {
// For now, only use non-vector load / store's for the left-over pieces.
- if (VT.isVector()) {
+ if (VT.isVector() || VT.isFloatingPoint()) {
VT = MVT::i64;
while (!TLI.isTypeLegal(VT))
VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
@@ -3269,11 +3261,11 @@ bool MeetsMaxMemopRequirement(std::vector<EVT> &MemOps,
}
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size,
- unsigned Align, bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff){
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align, bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Expand memcpy to a series of load and store ops if the size operand falls
@@ -3282,15 +3274,33 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
uint64_t Limit = -1ULL;
if (!AlwaysInline)
Limit = TLI.getMaxStoresPerMemcpy();
- unsigned DstAlign = Align; // Destination alignment can change.
+ bool DstAlignCanChange = false;
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
std::string Str;
- bool CopyFromStr;
- if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
- Str, CopyFromStr, DAG, TLI))
+ bool CopyFromStr = isMemSrcFromString(Src, Str);
+ bool isZeroStr = CopyFromStr && Str.empty();
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ (isZeroStr ? 0 : SrcAlign), true, DAG, TLI))
return SDValue();
+ if (DstAlignCanChange) {
+ const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
- bool isZeroStr = CopyFromStr && Str.empty();
SmallVector<SDValue, 8> OutChains;
unsigned NumMemOps = MemOps.size();
uint64_t SrcOff = 0, DstOff = 0;
@@ -3299,16 +3309,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value, Store;
- if (CopyFromStr && (isZeroStr || !VT.isVector())) {
+ if (CopyFromStr &&
+ (isZeroStr || (VT.isInteger() && !VT.isVector()))) {
// It's unlikely a store of a vector immediate can be done in a single
// instruction. It would require a load from a constantpool first.
- // We also handle store a vector with all zero's.
+ // We only handle zero vectors here.
// FIXME: Handle other cases where store of vector immediate is done in
// a single instruction.
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, false, false, DstAlign);
+ DstSV, DstSVOff + DstOff, false, false, Align);
} else {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
@@ -3319,11 +3330,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
assert(NVT.bitsGE(VT));
Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, VT, false, false, Align);
+ SrcSV, SrcSVOff + SrcOff, VT, false, false,
+ MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstSV, DstSVOff + DstOff, VT, false, false,
- DstAlign);
+ Align);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -3335,11 +3347,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
}
static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size,
- unsigned Align, bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff){
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align,bool AlwaysInline,
+ const Value *DstSV, uint64_t DstSVOff,
+ const Value *SrcSV, uint64_t SrcSVOff) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Expand memmove to a series of load and store ops if the size operand falls
@@ -3348,15 +3360,32 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
uint64_t Limit = -1ULL;
if (!AlwaysInline)
Limit = TLI.getMaxStoresPerMemmove();
- unsigned DstAlign = Align; // Destination alignment can change.
- std::string Str;
- bool CopyFromStr;
- if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, Limit, Size, DstAlign,
- Str, CopyFromStr, DAG, TLI))
+ bool DstAlignCanChange = false;
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+ if (Align > SrcAlign)
+ SrcAlign = Align;
+
+ if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
+ (DstAlignCanChange ? 0 : Align),
+ SrcAlign, true, DAG, TLI))
return SDValue();
- uint64_t SrcOff = 0, DstOff = 0;
+ if (DstAlignCanChange) {
+ const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+ uint64_t SrcOff = 0, DstOff = 0;
SmallVector<SDValue, 8> LoadValues;
SmallVector<SDValue, 8> LoadChains;
SmallVector<SDValue, 8> OutChains;
@@ -3368,7 +3397,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, false, false, Align);
+ SrcSV, SrcSVOff + SrcOff, false, false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3383,7 +3412,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, false, false, DstAlign);
+ DstSV, DstSVOff + DstOff, false, false, Align);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3393,24 +3422,40 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
}
static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
- SDValue Chain, SDValue Dst,
- SDValue Src, uint64_t Size,
- unsigned Align,
- const Value *DstSV, uint64_t DstSVOff) {
+ SDValue Chain, SDValue Dst,
+ SDValue Src, uint64_t Size,
+ unsigned Align,
+ const Value *DstSV, uint64_t DstSVOff) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// Expand memset to a series of load/store ops if the size operand
// falls below a certain threshold.
std::vector<EVT> MemOps;
- std::string Str;
- bool CopyFromStr;
- if (!MeetsMaxMemopRequirement(MemOps, Dst, Src, TLI.getMaxStoresPerMemset(),
- Size, Align, Str, CopyFromStr, DAG, TLI))
+ bool DstAlignCanChange = false;
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
+ if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+ DstAlignCanChange = true;
+ bool IsZero = isa<ConstantSDNode>(Src) &&
+ cast<ConstantSDNode>(Src)->isNullValue();
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+ Size, (DstAlignCanChange ? 0 : Align), 0,
+ IsZero, DAG, TLI))
return SDValue();
+ if (DstAlignCanChange) {
+ const Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext());
+ unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (NewAlign > Align) {
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
+ MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+ Align = NewAlign;
+ }
+ }
+
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
-
unsigned NumMemOps = MemOps.size();
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
@@ -3441,10 +3486,9 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
if (ConstantSize->isNullValue())
return Chain;
- SDValue Result =
- getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
- ConstantSize->getZExtValue(),
- Align, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
+ ConstantSize->getZExtValue(),Align,
+ false, DstSV, DstSVOff, SrcSV, SrcSVOff);
if (Result.getNode())
return Result;
}
@@ -4846,6 +4890,26 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
return NULL;
}
+/// getDbgValue - Creates a SDDbgValue node.
+///
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, SDNode *N, unsigned R, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, N, R, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, Value *C, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, C, Off, DL, O);
+}
+
+SDDbgValue *
+SelectionDAG::getDbgValue(MDNode *MDPtr, unsigned FI, uint64_t Off,
+ DebugLoc DL, unsigned O) {
+ return new (Allocator) SDDbgValue(MDPtr, FI, Off, DL, O);
+}
+
namespace {
/// RAUWUpdateListener - Helper for ReplaceAllUsesWith - When the node
@@ -5241,24 +5305,12 @@ unsigned SelectionDAG::GetOrdering(const SDNode *SD) const {
return Ordering->getOrder(SD);
}
-/// AssignDbgInfo - Assign debug info to the SDNode.
-void SelectionDAG::AssignDbgInfo(SDNode* SD, SDDbgValue* db) {
- assert(SD && "Trying to assign dbg info to a null node!");
- DbgInfo->add(SD, db);
- SD->setHasDebugValue(true);
-}
-
-/// RememberDbgInfo - Remember debug info which is not assigned to an SDNode.
-void SelectionDAG::RememberDbgInfo(SDDbgValue* db) {
- DbgInfo->add(db);
-}
-
-/// GetDbgInfo - Get the debug info, if any, for the SDNode.
-SDDbgValue* SelectionDAG::GetDbgInfo(const SDNode *SD) {
- assert(SD && "Trying to get the order of a null node!");
- if (SD->getHasDebugValue())
- return DbgInfo->getSDDbgValue(SD);
- return 0;
+/// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the
+/// value is produced by SD.
+void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD) {
+ DbgInfo->add(DB, SD);
+ if (SD)
+ SD->setHasDebugValue(true);
}
//===----------------------------------------------------------------------===//
@@ -6094,8 +6146,20 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a GlobalAddress + cst, return the alignment.
GlobalValue *GV;
int64_t GVOffset = 0;
- if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
- return MinAlign(GV->getAlignment(), GVOffset);
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) {
+ // If GV has specified alignment, then use it. Otherwise, use the preferred
+ // alignment.
+ unsigned Align = GV->getAlignment();
+ if (!Align) {
+ if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+ if (GVar->hasInitializer()) {
+ const TargetData *TD = TLI.getTargetData();
+ Align = TD->getPreferredAlignment(GVar);
+ }
+ }
+ }
+ return MinAlign(Align, GVOffset);
+ }
// If this is a direct reference to a stack slot, use information about the
// stack slot's alignment.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 12096b9..922c6e8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3800,7 +3800,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
int FI = SI->second;
if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
- if (MDNode *Dbg = DI.getMetadata("dbg"))
+ if (MDNode *Dbg = DI.getDbgMetadata())
MMI->setVariableDbgInfo(Variable, FI, Dbg);
return 0;
}
@@ -3824,22 +3824,19 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
// debug info exists.
++SDNodeOrder;
if (isa<ConstantInt>(V) || isa<ConstantFP>(V)) {
- SDDbgValue* dv = new SDDbgValue(Variable, V, Offset, dl, SDNodeOrder);
- DAG.RememberDbgInfo(dv);
+ DAG.AddDbgValue(DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder));
} else {
SDValue &N = NodeMap[V];
- if (N.getNode()) {
- SDDbgValue *dv = new SDDbgValue(Variable, N.getNode(),
- N.getResNo(), Offset, dl, SDNodeOrder);
- DAG.AssignDbgInfo(N.getNode(), dv);
- } else {
+ if (N.getNode())
+ DAG.AddDbgValue(DAG.getDbgValue(Variable, N.getNode(),
+ N.getResNo(), Offset, dl, SDNodeOrder),
+ N.getNode());
+ else
// We may expand this to cover more cases. One case where we have no
// data available is an unreferenced parameter; we need this fallback.
- SDDbgValue* dv = new SDDbgValue(Variable,
+ DAG.AddDbgValue(DAG.getDbgValue(Variable,
UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.RememberDbgInfo(dv);
- }
+ Offset, dl, SDNodeOrder));
}
// Build a debug info table entry.
@@ -3855,7 +3852,7 @@ SelectionDAGBuilder::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
return 0; // VLAs.
int FI = SI->second;
if (MachineModuleInfo *MMI = DAG.getMachineModuleInfo())
- if (MDNode *Dbg = DI.getMetadata("dbg"))
+ if (MDNode *Dbg = DI.getDbgMetadata())
MMI->setVariableDbgInfo(Variable, FI, Dbg);
return 0;
}
@@ -6054,8 +6051,10 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
}
if (!I->use_empty()) {
- SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues,
- SDB->getCurDebugLoc());
+ SDValue Res;
+ if (!ArgValues.empty())
+ Res = DAG.getMergeValues(&ArgValues[0], NumValues,
+ SDB->getCurDebugLoc());
SDB->setValue(I, Res);
// If this argument is live outside of the entry block, insert a copy from
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index cbbe431..ea96b21 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -61,6 +61,7 @@
using namespace llvm;
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
@@ -365,23 +366,23 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
/// attached with this instruction.
-static void SetDebugLoc(unsigned MDDbgKind, Instruction *I,
- SelectionDAGBuilder *SDB,
+static void SetDebugLoc(Instruction *I, SelectionDAGBuilder *SDB,
FastISel *FastIS, MachineFunction *MF) {
- if (MDNode *Dbg = I->getMetadata(MDDbgKind)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+ MDNode *Dbg = I->getDbgMetadata();
+ if (Dbg == 0) return;
+
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
- SDB->setCurDebugLoc(Loc);
+ SDB->setCurDebugLoc(Loc);
- if (FastIS)
- FastIS->setCurDebugLoc(Loc);
+ if (FastIS)
+ FastIS->setCurDebugLoc(Loc);
- // If the function doesn't have a default debug location yet, set
- // it. This is kind of a hack.
- if (MF->getDefaultDebugLoc().isUnknown())
- MF->setDefaultDebugLoc(Loc);
- }
+ // If the function doesn't have a default debug location yet, set
+ // it. This is kind of a hack.
+ if (MF->getDefaultDebugLoc().isUnknown())
+ MF->setDefaultDebugLoc(Loc);
}
/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
@@ -396,12 +397,11 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
BasicBlock::iterator End,
bool &HadTailCall) {
SDB->setCurrentBasicBlock(BB);
- unsigned MDDbgKind = LLVMBB->getContext().getMDKindID("dbg");
// Lower all of the non-terminator instructions. If a call is emitted
// as a tail call, cease emitting nodes for this block.
for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
- SetDebugLoc(MDDbgKind, I, SDB, 0, MF);
+ SetDebugLoc(I, SDB, 0, MF);
if (!isa<TerminatorInst>(I)) {
SDB->visit(*I);
@@ -424,7 +424,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
HandlePHINodesInSuccessorBlocks(LLVMBB);
// Lower the terminator after the copies are emitted.
- SetDebugLoc(MDDbgKind, LLVMBB->getTerminator(), SDB, 0, MF);
+ SetDebugLoc(LLVMBB->getTerminator(), SDB, 0, MF);
SDB->visit(*LLVMBB->getTerminator());
ResetDebugLoc(SDB, 0);
}
@@ -842,9 +842,6 @@ void SelectionDAGISel::DoInstructionSelection() {
DEBUG(errs() << "===== Instruction selection ends:\n");
PostprocessISelDAG();
-
- // FIXME: This shouldn't be needed, remove it.
- CurDAG->RemoveDeadNodes();
}
@@ -865,8 +862,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
#endif
);
- unsigned MDDbgKind = Fn.getContext().getMDKindID("dbg");
-
// Iterate over all basic blocks in the function.
for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
BasicBlock *LLVMBB = &*I;
@@ -964,7 +959,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
break;
}
- SetDebugLoc(MDDbgKind, BI, SDB, FastIS, &MF);
+ SetDebugLoc(BI, SDB, FastIS, &MF);
// Try to select the instruction with FastISel.
if (FastIS->SelectInstruction(BI)) {
@@ -1592,8 +1587,9 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
- // If the node became dead, delete it.
- if (ChainNode->use_empty())
+ // If the node became dead and we haven't already seen it, delete it.
+ if (ChainNode->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
@@ -1614,8 +1610,9 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
InputFlag, &ISU);
- // If the node became dead, delete it.
- if (FRN->use_empty())
+ // If the node became dead and we haven't already seen it, delete it.
+ if (FRN->use_empty() &&
+ !std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
NowDeadNodes.push_back(FRN);
}
}
@@ -1810,9 +1807,9 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
// It is possible we're using MorphNodeTo to replace a node with no
// normal results with one that has a normal result (or we could be
// adding a chain) and the input could have flags and chains as well.
- // In this case we need to shifting the operands down.
+ // In this case we need to shift the operands down.
// FIXME: This is a horrible hack and broken in obscure cases, no worse
- // than the old isel though. We should sink this into MorphNodeTo.
+ // than the old isel though.
int OldFlagResultNo = -1, OldChainResultNo = -1;
unsigned NTMNumResults = Node->getNumValues();
@@ -1888,7 +1885,9 @@ CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
- return N->getOpcode() == MatcherTable[MatcherIndex++];
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ return N->getOpcode() == Opc;
}
ALWAYS_INLINE static bool
@@ -2142,7 +2141,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize == 0) break;
// Get the opcode, add the index to the table.
- unsigned Opc = MatcherTable[Idx++];
+ uint16_t Opc = MatcherTable[Idx++];
+ Opc |= (unsigned short)MatcherTable[Idx++] << 8;
if (Opc >= OpcodeOffset.size())
OpcodeOffset.resize((Opc+1)*2);
OpcodeOffset[Opc] = Idx;
@@ -2181,6 +2181,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FailIndex = MatcherIndex+NumToSkip;
+ unsigned MatcherIndexOfPredicate = MatcherIndex;
+ (void)MatcherIndexOfPredicate; // silence warning.
+
// If we can't evaluate this predicate without pushing a scope (e.g. if
// it is a 'MoveParent') or if the predicate succeeds on this node, we
// push the scope and evaluate the full predicate chain.
@@ -2190,9 +2193,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (!Result)
break;
- DEBUG(errs() << " Skipped scope entry at index " << MatcherIndex
- << " continuing at " << FailIndex << "\n");
-
+ DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
+ << "index " << MatcherIndexOfPredicate
+ << ", continuing at " << FailIndex << "\n");
+ ++NumDAGIselRetries;
// Otherwise, we know that this case of the Scope is guaranteed to fail,
// move to the next case.
@@ -2298,8 +2302,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
+ uint16_t Opc = MatcherTable[MatcherIndex++];
+ Opc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+
// If the opcode matches, then we will execute this case.
- if (CurNodeOpcode == MatcherTable[MatcherIndex++])
+ if (CurNodeOpcode == Opc)
break;
// Otherwise, skip over this case.
@@ -2428,6 +2435,35 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
continue;
}
+ case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
+ case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
+ // These are space-optimized forms of OPC_EmitMergeInputChains.
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+ continue;
+ }
+
case OPC_EmitMergeInputChains: {
assert(InputChain.getNode() == 0 &&
"EmitMergeInputChains should be the first chain producing node");
@@ -2646,14 +2682,10 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
SDValue Res = RecordedNodes[ResSlot];
- // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program
- // after (parallel) on input patterns are removed. This would also
- // allow us to stop encoding #results in OPC_CompleteMatch's table
- // entry.
- if (NodeToMatch->getNumValues() <= i ||
- NodeToMatch->getValueType(i) == MVT::Other ||
- NodeToMatch->getValueType(i) == MVT::Flag)
- break;
+ assert(i < NodeToMatch->getNumValues() &&
+ NodeToMatch->getValueType(i) != MVT::Other &&
+ NodeToMatch->getValueType(i) != MVT::Flag &&
+ "Invalid number of results to complete!");
assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
NodeToMatch->getValueType(i) == MVT::iPTR ||
Res.getValueType() == MVT::iPTR ||
@@ -2685,6 +2717,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
DEBUG(errs() << " Match failed at index " << CurrentOpcodeIndex << "\n");
+ ++NumDAGIselRetries;
while (1) {
if (MatchScopes.empty()) {
CannotYetSelect(NodeToMatch);
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f7ef2d6..ea2ff2f 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -186,11 +186,13 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2";
Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2";
Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2";
- Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfi8";
- Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfi16";
+ Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi";
+ Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi";
Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi";
Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi";
Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti";
+ Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi";
+ Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi";
Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi";
Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi";
Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti";
@@ -200,11 +202,13 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi";
Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi";
Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti";
- Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfi8";
- Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfi16";
+ Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi";
+ Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi";
Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi";
Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi";
Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti";
+ Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi";
+ Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi";
Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi";
Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi";
Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti";
@@ -314,6 +318,10 @@ RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::i128)
return FPTOSINT_F32_I128;
} else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOSINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOSINT_F64_I16;
if (RetVT == MVT::i32)
return FPTOSINT_F64_I32;
if (RetVT == MVT::i64)
@@ -353,6 +361,10 @@ RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) {
if (RetVT == MVT::i128)
return FPTOUINT_F32_I128;
} else if (OpVT == MVT::f64) {
+ if (RetVT == MVT::i8)
+ return FPTOUINT_F64_I8;
+ if (RetVT == MVT::i16)
+ return FPTOUINT_F64_I16;
if (RetVT == MVT::i32)
return FPTOUINT_F64_I32;
if (RetVT == MVT::i64)
@@ -475,7 +487,6 @@ TargetLowering::TargetLowering(TargetMachine &tm,TargetLoweringObjectFile *tlof)
memset(LoadExtActions, 0, sizeof(LoadExtActions));
memset(TruncStoreActions, 0, sizeof(TruncStoreActions));
memset(IndexedModeActions, 0, sizeof(IndexedModeActions));
- memset(ConvertActions, 0, sizeof(ConvertActions));
memset(CondCodeActions, 0, sizeof(CondCodeActions));
// Set default actions for various operations.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 97e858f..15ca374 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -59,11 +59,6 @@ DisableCrossClassJoin("disable-cross-class-join",
cl::desc("Avoid coalescing cross register class copies"),
cl::init(false), cl::Hidden);
-static cl::opt<bool>
-PhysJoinTweak("tweak-phys-join-heuristics",
- cl::desc("Tweak heuristics for joining phys reg with vr"),
- cl::init(false), cl::Hidden);
-
static RegisterPass<SimpleRegisterCoalescing>
X("simple-register-coalescing", "Simple Register Coalescing");
@@ -1445,7 +1440,6 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg);
const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg);
const TargetRegisterClass *NewRC = NULL;
- MachineBasicBlock *CopyMBB = CopyMI->getParent();
unsigned RealDstReg = 0;
unsigned RealSrcReg = 0;
if (isExtSubReg || isInsSubReg || isSubRegToReg) {
@@ -1646,8 +1640,8 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
else if (RealSrcReg)
SavedLI.reset(li_->dupInterval(&DstInt));
- // Check if it is necessary to propagate "isDead" property.
if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) {
+ // Check if it is necessary to propagate "isDead" property.
MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false);
bool isDead = mopd->isDead();
@@ -1656,60 +1650,42 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// these are not spillable! If the destination interval uses are far away,
// think twice about coalescing them!
if (!isDead && (SrcIsPhys || DstIsPhys)) {
- // If the copy is in a loop, take care not to coalesce aggressively if the
- // src is coming in from outside the loop (or the dst is out of the loop).
- // If it's not in a loop, then determine whether to join them base purely
- // by the length of the interval.
- if (PhysJoinTweak) {
- if (SrcIsPhys) {
- if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
- mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg);
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
- } else {
- if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) {
- mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg);
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
- }
- } else {
- // If the virtual register live interval is long but it has low use
- // density, do not join them, instead mark the physical register as its
- // allocation preference.
- LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
- LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt;
- unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
- unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
-
- // Don't join with physregs that have a ridiculous number of live
- // ranges. The data structure performance is really bad when that
- // happens.
- if (JoinPInt.ranges.size() > 1000) {
- mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
- ++numAborts;
- DEBUG(dbgs() << "\tPhysical register too complicated, abort!\n");
- return false;
- }
+ // If the virtual register live interval is long but it has low use
+ // density, do not join them, instead mark the physical register as its
+ // allocation preference.
+ LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
+ LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt;
+ unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
+ unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
+
+ // Don't join with physregs that have a ridiculous number of live
+ // ranges. The data structure performance is really bad when that
+ // happens.
+ if (JoinPInt.ranges.size() > 1000) {
+ mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
+ ++numAborts;
+ DEBUG(dbgs()
+ << "\tPhysical register live interval too complicated, abort!\n");
+ return false;
+ }
- const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
- unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
- unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
- float Ratio = 1.0 / Threshold;
- if (Length > Threshold &&
- (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
- mri_->use_nodbg_end()) / Length) < Ratio)) {
- mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
- ++numAborts;
- DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
- Again = true; // May be possible to coalesce later.
- return false;
- }
+ const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg);
+ unsigned Threshold = allocatableRCRegs_[RC].count() * 2;
+ unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+ float Ratio = 1.0 / Threshold;
+ if (Length > Threshold &&
+ (((float)std::distance(mri_->use_nodbg_begin(JoinVReg),
+ mri_->use_nodbg_end()) / Length) < Ratio)) {
+ // Before giving up coalescing, if definition of source is defined by
+ // trivial computation, try rematerializing it.
+ if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI))
+ return true;
+
+ mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg);
+ ++numAborts;
+ DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+ Again = true; // May be possible to coalesce later.
+ return false;
}
}
}
@@ -1720,16 +1696,15 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// been modified, so we can use this information below to update aliases.
bool Swapped = false;
// If SrcInt is implicitly defined, it's safe to coalesce.
- bool isEmpty = SrcInt.empty();
- if (isEmpty && !CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
- // Only coalesce an empty interval (defined by implicit_def) with
- // another interval which has a valno defined by the CopyMI and the CopyMI
- // is a kill of the implicit def.
- DEBUG(dbgs() << "Not profitable!\n");
- return false;
- }
-
- if (!isEmpty && !JoinIntervals(DstInt, SrcInt, Swapped)) {
+ if (SrcInt.empty()) {
+ if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) {
+ // Only coalesce an empty interval (defined by implicit_def) with
+ // another interval which has a valno defined by the CopyMI and the CopyMI
+ // is a kill of the implicit def.
+ DEBUG(dbgs() << "Not profitable!\n");
+ return false;
+ }
+ } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) {
// Coalescing failed.
// If definition of source is defined by trivial computation, try
@@ -2800,7 +2775,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
if (MO.isDead())
continue;
if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !mri_->use_empty(Reg)) {
+ !mri_->use_nodbg_empty(Reg)) {
isDead = false;
break;
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index b62cca3..d6bdb10 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -406,7 +406,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
// Add information about the stub reference to ELFMMI so that the stub
// gets emitted by the asmprinter.
- MCSymbol *SSym = getContext().GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
MCSymbol *Sym = Mang->getSymbol(GV);
@@ -759,7 +759,7 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
// Add information about the stub reference to MachOMMI so that the stub
// gets emitted by the asmprinter.
- MCSymbol *SSym = getContext().GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
if (StubSym.getPointer() == 0) {
MCSymbol *Sym = Mang->getSymbol(GV);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index c840b39..c288ae0 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -188,8 +188,9 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
// Find the instruction that kills SavedReg.
MachineInstr *KillMI = NULL;
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SavedReg),
- UE = MRI->use_end(); UI != UE; ++UI) {
+ for (MachineRegisterInfo::use_nodbg_iterator
+ UI = MRI->use_nodbg_begin(SavedReg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineOperand &UseMO = UI.getOperand();
if (!UseMO.isKill())
continue;
@@ -280,8 +281,8 @@ TwoAddressInstructionPass::isProfitableToReMat(unsigned Reg,
MachineInstr *MI, MachineInstr *DefMI,
MachineBasicBlock *MBB, unsigned Loc) {
bool OtherUse = false;
- for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
- UE = MRI->use_end(); UI != UE; ++UI) {
+ for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg),
+ UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineOperand &UseMO = UI.getOperand();
MachineInstr *UseMI = UseMO.getParent();
MachineBasicBlock *UseMBB = UseMI->getParent();
@@ -927,6 +928,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
mi = nmi;
continue;
}
+
const TargetInstrDesc &TID = mi->getDesc();
bool FirstTied = true;
@@ -1101,7 +1103,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// Some remat'ed instructions are dead.
int VReg = ReMatRegs.find_first();
while (VReg != -1) {
- if (MRI->use_empty(VReg)) {
+ if (MRI->use_nodbg_empty(VReg)) {
MachineInstr *DefMI = MRI->getVRegDef(VReg);
DefMI->eraseFromParent();
}
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 44d5311..0b7fde7 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -572,6 +572,9 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I,
static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
BitVector &RegKills,
std::vector<MachineOperand*> &KillOps) {
+ // These do not affect kill info at all.
+ if (MI.isDebugValue())
+ return;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse() || MO.isUndef())
@@ -987,10 +990,17 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
SmallVector<unsigned, 4> Kills;
// Take a look at 2 instructions at most.
- for (unsigned Count = 0; Count < 2; ++Count) {
+ unsigned Count = 0;
+ while (Count < 2) {
if (MII == MBB.begin())
break;
MachineInstr *PrevMI = prior(MII);
+ MII = PrevMI;
+
+ if (PrevMI->isDebugValue())
+ continue; // Skip over dbg_value instructions.
+ ++Count;
+
for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = PrevMI->getOperand(i);
if (!MO.isReg() || MO.getReg() == 0)
@@ -1019,8 +1029,6 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII,
for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS)
Uses.set(*AS);
}
-
- MII = PrevMI;
}
return 0;
@@ -1210,6 +1218,9 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
std::vector<MachineOperand*> &KillOps) {
MachineBasicBlock::iterator NextMII = llvm::next(MII);
+ // Skip over dbg_value instructions.
+ while (NextMII != MBB->end() && NextMII->isDebugValue())
+ NextMII = llvm::next(NextMII);
if (NextMII == MBB->end())
return false;
@@ -1274,6 +1285,9 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
VRM->RemoveMachineInstrFromMaps(&NextMI);
MBB->erase(&NextMI);
++NumModRefUnfold;
+ // Skip over dbg_value instructions.
+ while (NextMII != MBB->end() && NextMII->isDebugValue())
+ NextMII = llvm::next(NextMII);
if (NextMII == MBB->end())
break;
} while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM));
@@ -1619,7 +1633,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills,
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg),
RE = MRI->reg_end(); RI != RE; ++RI) {
MachineInstr *UDMI = &*RI;
- if (UDMI->getParent() != MBB)
+ if (UDMI->isDebugValue() || UDMI->getParent() != MBB)
continue;
DenseMap<MachineInstr*, unsigned>::iterator DI = DistanceMap.find(UDMI);
if (DI == DistanceMap.end())
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index b2e2a04..da21c2d 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -66,10 +66,39 @@ ExecutionEngine::~ExecutionEngine() {
delete Modules[i];
}
+namespace {
+// This class automatically deletes the memory block when the GlobalVariable is
+// destroyed.
+class GVMemoryBlock : public CallbackVH {
+ GVMemoryBlock(const GlobalVariable *GV)
+ : CallbackVH(const_cast<GlobalVariable*>(GV)) {}
+
+public:
+ // Returns the address the GlobalVariable should be written into. The
+ // GVMemoryBlock object prefixes that.
+ static char *Create(const GlobalVariable *GV, const TargetData& TD) {
+ const Type *ElTy = GV->getType()->getElementType();
+ size_t GVSize = (size_t)TD.getTypeAllocSize(ElTy);
+ void *RawMemory = ::operator new(
+ TargetData::RoundUpAlignment(sizeof(GVMemoryBlock),
+ TD.getPreferredAlignment(GV))
+ + GVSize);
+ new(RawMemory) GVMemoryBlock(GV);
+ return static_cast<char*>(RawMemory) + sizeof(GVMemoryBlock);
+ }
+
+ virtual void deleted() {
+ // We allocated with operator new and with some extra memory hanging off the
+ // end, so don't just delete this. I'm not sure if this is actually
+ // required.
+ this->~GVMemoryBlock();
+ ::operator delete(this);
+ }
+};
+} // anonymous namespace
+
char* ExecutionEngine::getMemoryForGV(const GlobalVariable* GV) {
- const Type *ElTy = GV->getType()->getElementType();
- size_t GVSize = (size_t)getTargetData()->getTypeAllocSize(ElTy);
- return new char[GVSize];
+ return GVMemoryBlock::Create(GV, *getTargetData());
}
/// removeModule - Remove a Module from the list of modules.
@@ -221,35 +250,55 @@ const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) {
return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : 0;
}
-// CreateArgv - Turn a vector of strings into a nice argv style array of
-// pointers to null terminated strings.
-//
-static void *CreateArgv(LLVMContext &C, ExecutionEngine *EE,
- const std::vector<std::string> &InputArgv) {
+namespace {
+class ArgvArray {
+ char *Array;
+ std::vector<char*> Values;
+public:
+ ArgvArray() : Array(NULL) {}
+ ~ArgvArray() { clear(); }
+ void clear() {
+ delete[] Array;
+ Array = NULL;
+ for (size_t I = 0, E = Values.size(); I != E; ++I) {
+ delete[] Values[I];
+ }
+ Values.clear();
+ }
+ /// Turn a vector of strings into a nice argv style array of pointers to null
+ /// terminated strings.
+ void *reset(LLVMContext &C, ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv);
+};
+} // anonymous namespace
+void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
+ const std::vector<std::string> &InputArgv) {
+ clear(); // Free the old contents.
unsigned PtrSize = EE->getTargetData()->getPointerSize();
- char *Result = new char[(InputArgv.size()+1)*PtrSize];
+ Array = new char[(InputArgv.size()+1)*PtrSize];
- DEBUG(dbgs() << "JIT: ARGV = " << (void*)Result << "\n");
+ DEBUG(dbgs() << "JIT: ARGV = " << (void*)Array << "\n");
const Type *SBytePtr = Type::getInt8PtrTy(C);
for (unsigned i = 0; i != InputArgv.size(); ++i) {
unsigned Size = InputArgv[i].size()+1;
char *Dest = new char[Size];
+ Values.push_back(Dest);
DEBUG(dbgs() << "JIT: ARGV[" << i << "] = " << (void*)Dest << "\n");
std::copy(InputArgv[i].begin(), InputArgv[i].end(), Dest);
Dest[Size-1] = 0;
- // Endian safe: Result[i] = (PointerTy)Dest;
- EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Result+i*PtrSize),
+ // Endian safe: Array[i] = (PointerTy)Dest;
+ EE->StoreValueToMemory(PTOGV(Dest), (GenericValue*)(Array+i*PtrSize),
SBytePtr);
}
// Null terminate it
EE->StoreValueToMemory(PTOGV(0),
- (GenericValue*)(Result+InputArgv.size()*PtrSize),
+ (GenericValue*)(Array+InputArgv.size()*PtrSize),
SBytePtr);
- return Result;
+ return Array;
}
@@ -353,11 +402,13 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
llvm_report_error("Invalid number of arguments of main() supplied");
}
+ ArgvArray CArgv;
+ ArgvArray CEnv;
if (NumArgs) {
GVArgs.push_back(GVArgc); // Arg #0 = argc.
if (NumArgs > 1) {
// Arg #1 = argv.
- GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, argv)));
+ GVArgs.push_back(PTOGV(CArgv.reset(Fn->getContext(), this, argv)));
assert(!isTargetNullPtr(this, GVTOP(GVArgs[1])) &&
"argv[0] was null after CreateArgv");
if (NumArgs > 2) {
@@ -365,7 +416,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
for (unsigned i = 0; envp[i]; ++i)
EnvVars.push_back(envp[i]);
// Arg #2 = envp.
- GVArgs.push_back(PTOGV(CreateArgv(Fn->getContext(), this, EnvVars)));
+ GVArgs.push_back(PTOGV(CEnv.reset(Fn->getContext(), this, EnvVars)));
}
}
}
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 7b061d3..3ba783b 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -265,6 +265,8 @@ GenericValue Interpreter::callExternalFunction(Function *F,
if (RF == RawFunctions->end()) {
RawFn = (RawFunc)(intptr_t)
sys::DynamicLibrary::SearchForAddressOfSymbol(F->getName());
+ if (!RawFn)
+ RawFn = (RawFunc)(intptr_t)getPointerToGlobalIfAvailable(F);
if (RawFn != 0)
RawFunctions->insert(std::make_pair(F, RawFn)); // Cache for later
} else {
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 2025463..7a23aec 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -623,24 +623,10 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
AddEncodingComment(Inst);
// Show the MCInst if enabled.
- if (ShowInst) {
- raw_ostream &OS = GetCommentOS();
- OS << "<MCInst #" << Inst.getOpcode();
-
- StringRef InstName;
- if (InstPrinter)
- InstName = InstPrinter->getOpcodeName(Inst.getOpcode());
- if (!InstName.empty())
- OS << ' ' << InstName;
-
- for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) {
- OS << "\n ";
- Inst.getOperand(i).print(OS, &MAI);
- }
- OS << ">\n";
- }
+ if (ShowInst)
+ Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
- // If we have an AsmPrinter, use that to print, otherwise dump the MCInst.
+ // If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
InstPrinter->printInst(&Inst);
else
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index beecf7e..03b8bd3 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -19,19 +19,26 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Debug.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
-// FIXME: Gross.
-#include "../Target/X86/X86FixupKinds.h"
-
#include <vector>
using namespace llvm;
+namespace {
+namespace stats {
STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
+STATISTIC(EvaluateFixup, "Number of evaluated fixups");
+STATISTIC(FragmentLayouts, "Number of fragment layouts");
+STATISTIC(ObjectBytes, "Number of emitted object file bytes");
+STATISTIC(RelaxationSteps, "Number of assembler layout and relaxation steps");
+STATISTIC(RelaxedInstructions, "Number of relaxed instructions");
+STATISTIC(SectionLayouts, "Number of section layouts");
+}
+}
// FIXME FIXME FIXME: There are number of places in this file where we convert
// what is a 64-bit assembler value used for computation into a value in the
@@ -40,13 +47,103 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
/* *** */
+void MCAsmLayout::UpdateForSlide(MCFragment *F, int SlideAmount) {
+ // We shouldn't have to do anything special to support negative slides, and it
+ // is a perfectly valid thing to do as long as other parts of the system are
+ // can guarantee convergence.
+ assert(SlideAmount >= 0 && "Negative slides not yet supported");
+
+ // Update the layout by simply recomputing the layout for the entire
+ // file. This is trivially correct, but very slow.
+ //
+ // FIXME-PERF: This is O(N^2), but will be eliminated once we get smarter.
+
+ // Layout the concrete sections and fragments.
+ MCAssembler &Asm = getAssembler();
+ uint64_t Address = 0;
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
+ // Skip virtual sections.
+ if (Asm.getBackend().isVirtualSection(it->getSection()))
+ continue;
+
+ // Layout the section fragments and its size.
+ Address = Asm.LayoutSection(*it, *this, Address);
+ }
+
+ // Layout the virtual sections.
+ for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) {
+ if (!Asm.getBackend().isVirtualSection(it->getSection()))
+ continue;
+
+ // Layout the section fragments and its size.
+ Address = Asm.LayoutSection(*it, *this, Address);
+ }
+}
+
+uint64_t MCAsmLayout::getFragmentAddress(const MCFragment *F) const {
+ assert(F->getParent() && "Missing section()!");
+ return getSectionAddress(F->getParent()) + getFragmentOffset(F);
+}
+
+uint64_t MCAsmLayout::getFragmentEffectiveSize(const MCFragment *F) const {
+ assert(F->EffectiveSize != ~UINT64_C(0) && "Address not set!");
+ return F->EffectiveSize;
+}
+
+void MCAsmLayout::setFragmentEffectiveSize(MCFragment *F, uint64_t Value) {
+ F->EffectiveSize = Value;
+}
+
+uint64_t MCAsmLayout::getFragmentOffset(const MCFragment *F) const {
+ assert(F->Offset != ~UINT64_C(0) && "Address not set!");
+ return F->Offset;
+}
+
+void MCAsmLayout::setFragmentOffset(MCFragment *F, uint64_t Value) {
+ F->Offset = Value;
+}
+
+uint64_t MCAsmLayout::getSymbolAddress(const MCSymbolData *SD) const {
+ assert(SD->getFragment() && "Invalid getAddress() on undefined symbol!");
+ return getFragmentAddress(SD->getFragment()) + SD->getOffset();
+}
+
+uint64_t MCAsmLayout::getSectionAddress(const MCSectionData *SD) const {
+ assert(SD->Address != ~UINT64_C(0) && "Address not set!");
+ return SD->Address;
+}
+
+void MCAsmLayout::setSectionAddress(MCSectionData *SD, uint64_t Value) {
+ SD->Address = Value;
+}
+
+uint64_t MCAsmLayout::getSectionSize(const MCSectionData *SD) const {
+ assert(SD->Size != ~UINT64_C(0) && "File size not set!");
+ return SD->Size;
+}
+void MCAsmLayout::setSectionSize(MCSectionData *SD, uint64_t Value) {
+ SD->Size = Value;
+}
+
+uint64_t MCAsmLayout::getSectionFileSize(const MCSectionData *SD) const {
+ assert(SD->FileSize != ~UINT64_C(0) && "File size not set!");
+ return SD->FileSize;
+}
+void MCAsmLayout::setSectionFileSize(MCSectionData *SD, uint64_t Value) {
+ SD->FileSize = Value;
+}
+
+ /// @}
+
+/* *** */
+
MCFragment::MCFragment() : Kind(FragmentType(~0)) {
}
MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
: Kind(_Kind),
Parent(_Parent),
- FileSize(~UINT64_C(0))
+ EffectiveSize(~UINT64_C(0))
{
if (Parent)
Parent->getFragmentList().push_back(this);
@@ -55,11 +152,6 @@ MCFragment::MCFragment(FragmentType _Kind, MCSectionData *_Parent)
MCFragment::~MCFragment() {
}
-uint64_t MCFragment::getAddress() const {
- assert(getParent() && "Missing Section!");
- return getParent()->getAddress() + Offset;
-}
-
/* *** */
MCSectionData::MCSectionData() : Section(0) {}
@@ -95,7 +187,7 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment,
MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend,
MCCodeEmitter &_Emitter, raw_ostream &_OS)
: Context(_Context), Backend(_Backend), Emitter(_Emitter),
- OS(_OS), SubsectionsViaSymbols(false)
+ OS(_OS), RelaxAll(false), SubsectionsViaSymbols(false)
{
}
@@ -104,7 +196,6 @@ MCAssembler::~MCAssembler() {
static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
const MCAsmFixup &Fixup,
- const MCDataFragment *DF,
const MCValue Target,
const MCSection *BaseSection) {
// The effective fixup address is
@@ -141,8 +232,8 @@ static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm,
}
static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
const MCAsmFixup &Fixup,
- const MCDataFragment *DF,
const MCValue Target,
const MCSymbolData *BaseSymbol) {
// The effective fixup address is
@@ -162,7 +253,7 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
if (A->getKind() != MCSymbolRefExpr::VK_None)
return false;
- A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol()));
+ A_Base = Asm.getAtom(Layout, &Asm.getSymbolData(A->getSymbol()));
if (!A_Base)
return false;
}
@@ -172,7 +263,7 @@ static bool isScatteredFixupFullyResolved(const MCAssembler &Asm,
if (B->getKind() != MCSymbolRefExpr::VK_None)
return false;
- B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol()));
+ B_Base = Asm.getAtom(Layout, &Asm.getSymbolData(B->getSymbol()));
if (!B_Base)
return false;
}
@@ -200,9 +291,13 @@ bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const {
SD->getFragment()->getParent()->getSection());
}
-const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section,
+// FIXME-PERF: This routine is really slow.
+const MCSymbolData *MCAssembler::getAtomForAddress(const MCAsmLayout &Layout,
+ const MCSectionData *Section,
uint64_t Address) const {
const MCSymbolData *Best = 0;
+ uint64_t BestAddress = 0;
+
for (MCAssembler::const_symbol_iterator it = symbol_begin(),
ie = symbol_end(); it != ie; ++it) {
// Ignore non-linker visible symbols.
@@ -215,15 +310,19 @@ const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section,
// Otherwise, find the closest symbol preceding this address (ties are
// resolved in favor of the last defined symbol).
- if (it->getAddress() <= Address &&
- (!Best || it->getAddress() >= Best->getAddress()))
+ uint64_t SymbolAddress = Layout.getSymbolAddress(it);
+ if (SymbolAddress <= Address && (!Best || SymbolAddress >= BestAddress)) {
Best = it;
+ BestAddress = SymbolAddress;
+ }
}
return Best;
}
-const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
+// FIXME-PERF: This routine is really slow.
+const MCSymbolData *MCAssembler::getAtom(const MCAsmLayout &Layout,
+ const MCSymbolData *SD) const {
// Linker visible symbols define atoms.
if (isSymbolLinkerVisible(SD))
return SD;
@@ -233,12 +332,15 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const {
return 0;
// Otherwise, search by address.
- return getAtomForAddress(SD->getFragment()->getParent(), SD->getAddress());
+ return getAtomForAddress(Layout, SD->getFragment()->getParent(),
+ Layout.getSymbolAddress(SD));
}
-bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
- MCDataFragment *DF,
+bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout,
+ const MCAsmFixup &Fixup, const MCFragment *DF,
MCValue &Target, uint64_t &Value) const {
+ ++stats::EvaluateFixup;
+
if (!Fixup.Value->EvaluateAsRelocatable(Target, &Layout))
llvm_report_error("expected relocatable expression");
@@ -253,13 +355,13 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
bool IsResolved = true;
if (const MCSymbolRefExpr *A = Target.getSymA()) {
if (A->getSymbol().isDefined())
- Value += getSymbolData(A->getSymbol()).getAddress();
+ Value += Layout.getSymbolAddress(&getSymbolData(A->getSymbol()));
else
IsResolved = false;
}
if (const MCSymbolRefExpr *B = Target.getSymB()) {
if (B->getSymbol().isDefined())
- Value -= getSymbolData(B->getSymbol()).getAddress();
+ Value -= Layout.getSymbolAddress(&getSymbolData(B->getSymbol()));
else
IsResolved = false;
}
@@ -273,55 +375,90 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup,
const MCSymbolData *BaseSymbol = 0;
if (IsPCRel) {
BaseSymbol = getAtomForAddress(
- DF->getParent(), DF->getAddress() + Fixup.Offset);
+ Layout, DF->getParent(), Layout.getFragmentAddress(DF)+Fixup.Offset);
if (!BaseSymbol)
IsResolved = false;
}
if (IsResolved)
- IsResolved = isScatteredFixupFullyResolved(*this, Fixup, DF, Target,
+ IsResolved = isScatteredFixupFullyResolved(*this, Layout, Fixup, Target,
BaseSymbol);
} else {
const MCSection *BaseSection = 0;
if (IsPCRel)
BaseSection = &DF->getParent()->getSection();
- IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, DF, Target,
+ IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, Target,
BaseSection);
}
}
if (IsPCRel)
- Value -= DF->getAddress() + Fixup.Offset;
+ Value -= Layout.getFragmentAddress(DF) + Fixup.Offset;
return IsResolved;
}
-void MCAssembler::LayoutSection(MCSectionData &SD) {
- MCAsmLayout Layout(*this);
- uint64_t Address = SD.getAddress();
+uint64_t MCAssembler::LayoutSection(MCSectionData &SD,
+ MCAsmLayout &Layout,
+ uint64_t StartAddress) {
+ bool IsVirtual = getBackend().isVirtualSection(SD.getSection());
+
+ ++stats::SectionLayouts;
+
+ // Align this section if necessary by adding padding bytes to the previous
+ // section. It is safe to adjust this out-of-band, because no symbol or
+ // fragment is allowed to point past the end of the section at any time.
+ if (uint64_t Pad = OffsetToAlignment(StartAddress, SD.getAlignment())) {
+ // Unless this section is virtual (where we are allowed to adjust the offset
+ // freely), the padding goes in the previous section.
+ if (!IsVirtual) {
+ // Find the previous non-virtual section.
+ iterator it = &SD;
+ assert(it != begin() && "Invalid initial section address!");
+ for (--it; getBackend().isVirtualSection(it->getSection()); --it) ;
+ Layout.setSectionFileSize(&*it, Layout.getSectionFileSize(&*it) + Pad);
+ }
+
+ StartAddress += Pad;
+ }
+
+ // Set the aligned section address.
+ Layout.setSectionAddress(&SD, StartAddress);
+ uint64_t Address = StartAddress;
for (MCSectionData::iterator it = SD.begin(), ie = SD.end(); it != ie; ++it) {
MCFragment &F = *it;
- F.setOffset(Address - SD.getAddress());
+ ++stats::FragmentLayouts;
+
+ uint64_t FragmentOffset = Address - StartAddress;
+ Layout.setFragmentOffset(&F, FragmentOffset);
// Evaluate fragment size.
+ uint64_t EffectiveSize = 0;
switch (F.getKind()) {
case MCFragment::FT_Align: {
MCAlignFragment &AF = cast<MCAlignFragment>(F);
- uint64_t Size = OffsetToAlignment(Address, AF.getAlignment());
- if (Size > AF.getMaxBytesToEmit())
- AF.setFileSize(0);
- else
- AF.setFileSize(Size);
+ EffectiveSize = OffsetToAlignment(Address, AF.getAlignment());
+ if (EffectiveSize > AF.getMaxBytesToEmit())
+ EffectiveSize = 0;
break;
}
case MCFragment::FT_Data:
- case MCFragment::FT_Fill:
- F.setFileSize(F.getMaxFileSize());
+ EffectiveSize = cast<MCDataFragment>(F).getContents().size();
+ break;
+
+ case MCFragment::FT_Fill: {
+ MCFillFragment &FF = cast<MCFillFragment>(F);
+ EffectiveSize = FF.getValueSize() * FF.getCount();
+ break;
+ }
+
+ case MCFragment::FT_Inst:
+ EffectiveSize = cast<MCInstFragment>(F).getInstSize();
break;
case MCFragment::FT_Org: {
@@ -332,12 +469,12 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
llvm_report_error("expected assembly-time absolute expression");
// FIXME: We need a way to communicate this error.
- int64_t Offset = TargetLocation - F.getOffset();
+ int64_t Offset = TargetLocation - FragmentOffset;
if (Offset < 0)
llvm_report_error("invalid .org offset '" + Twine(TargetLocation) +
- "' (at offset '" + Twine(F.getOffset()) + "'");
+ "' (at offset '" + Twine(FragmentOffset) + "'");
- F.setFileSize(Offset);
+ EffectiveSize = Offset;
break;
}
@@ -346,114 +483,66 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
// Align the fragment offset; it is safe to adjust the offset freely since
// this is only in virtual sections.
+ //
+ // FIXME: We shouldn't be doing this here.
Address = RoundUpToAlignment(Address, ZFF.getAlignment());
- F.setOffset(Address - SD.getAddress());
+ Layout.setFragmentOffset(&F, Address - StartAddress);
- // FIXME: This is misnamed.
- F.setFileSize(ZFF.getSize());
+ EffectiveSize = ZFF.getSize();
break;
}
}
- Address += F.getFileSize();
+ Layout.setFragmentEffectiveSize(&F, EffectiveSize);
+ Address += EffectiveSize;
}
// Set the section sizes.
- SD.setSize(Address - SD.getAddress());
- if (getBackend().isVirtualSection(SD.getSection()))
- SD.setFileSize(0);
+ Layout.setSectionSize(&SD, Address - StartAddress);
+ if (IsVirtual)
+ Layout.setSectionFileSize(&SD, 0);
else
- SD.setFileSize(Address - SD.getAddress());
-}
-
-/// WriteNopData - Write optimal nops to the output file for the \arg Count
-/// bytes. This returns the number of bytes written. It may return 0 if
-/// the \arg Count is more than the maximum optimal nops.
-///
-/// FIXME this is X86 32-bit specific and should move to a better place.
-static uint64_t WriteNopData(uint64_t Count, MCObjectWriter *OW) {
- static const uint8_t Nops[16][16] = {
- // nop
- {0x90},
- // xchg %ax,%ax
- {0x66, 0x90},
- // nopl (%[re]ax)
- {0x0f, 0x1f, 0x00},
- // nopl 0(%[re]ax)
- {0x0f, 0x1f, 0x40, 0x00},
- // nopl 0(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopw 0L(%[re]ax,%[re]ax,1)
- {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopw %cs:0L(%[re]ax,%[re]ax,1)
- {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
- // nopl 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopw 0(%[re]ax,%[re]ax,1)
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
- // nopw 0(%[re]ax,%[re]ax,1)
- // nopl 0L(%[re]ax) */
- {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
- // nopl 0L(%[re]ax)
- // nopl 0L(%[re]ax,%[re]ax,1)
- {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
- 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
- };
-
- if (Count > 15)
- return 0;
-
- for (uint64_t i = 0; i < Count; i++)
- OW->Write8(uint8_t(Nops[Count - 1][i]));
+ Layout.setSectionFileSize(&SD, Address - StartAddress);
- return Count;
+ return Address;
}
/// WriteFragmentData - Write the \arg F data to the output file.
-static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) {
+static void WriteFragmentData(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment &F, MCObjectWriter *OW) {
uint64_t Start = OW->getStream().tell();
(void) Start;
- ++EmittedFragments;
+ ++stats::EmittedFragments;
// FIXME: Embed in fragments instead?
+ uint64_t FragmentSize = Layout.getFragmentEffectiveSize(&F);
switch (F.getKind()) {
case MCFragment::FT_Align: {
MCAlignFragment &AF = cast<MCAlignFragment>(F);
- uint64_t Count = AF.getFileSize() / AF.getValueSize();
+ uint64_t Count = FragmentSize / AF.getValueSize();
// FIXME: This error shouldn't actually occur (the front end should emit
// multiple .align directives to enforce the semantics it wants), but is
// severe enough that we want to report it. How to handle this?
- if (Count * AF.getValueSize() != AF.getFileSize())
+ if (Count * AF.getValueSize() != FragmentSize)
llvm_report_error("undefined .align directive, value size '" +
Twine(AF.getValueSize()) +
"' is not a divisor of padding size '" +
- Twine(AF.getFileSize()) + "'");
+ Twine(FragmentSize) + "'");
// See if we are aligning with nops, and if so do that first to try to fill
// the Count bytes. Then if that did not fill any bytes or there are any
// bytes left to fill use the the Value and ValueSize to fill the rest.
+ // If we are aligning with nops, ask that target to emit the right data.
if (AF.getEmitNops()) {
- uint64_t NopByteCount = WriteNopData(Count, OW);
- Count -= NopByteCount;
+ if (!Asm.getBackend().WriteNopData(Count, OW))
+ llvm_report_error("unable to write nop sequence of " +
+ Twine(Count) + " bytes");
+ break;
}
+ // Otherwise, write out in multiples of the value size.
for (uint64_t i = 0; i != Count; ++i) {
switch (AF.getValueSize()) {
default:
@@ -468,7 +557,9 @@ static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) {
}
case MCFragment::FT_Data: {
- OW->WriteBytes(cast<MCDataFragment>(F).getContents().str());
+ MCDataFragment &DF = cast<MCDataFragment>(F);
+ assert(FragmentSize == DF.getContents().size() && "Invalid size!");
+ OW->WriteBytes(DF.getContents().str());
break;
}
@@ -487,10 +578,14 @@ static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) {
break;
}
+ case MCFragment::FT_Inst:
+ llvm_unreachable("unexpected inst fragment after lowering");
+ break;
+
case MCFragment::FT_Org: {
MCOrgFragment &OF = cast<MCOrgFragment>(F);
- for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i)
+ for (uint64_t i = 0, e = FragmentSize; i != e; ++i)
OW->Write8(uint8_t(OF.getValue()));
break;
@@ -502,14 +597,18 @@ static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) {
}
}
- assert(OW->getStream().tell() - Start == F.getFileSize());
+ assert(OW->getStream().tell() - Start == FragmentSize);
}
void MCAssembler::WriteSectionData(const MCSectionData *SD,
+ const MCAsmLayout &Layout,
MCObjectWriter *OW) const {
+ uint64_t SectionSize = Layout.getSectionSize(SD);
+ uint64_t SectionFileSize = Layout.getSectionFileSize(SD);
+
// Ignore virtual sections.
if (getBackend().isVirtualSection(SD->getSection())) {
- assert(SD->getFileSize() == 0);
+ assert(SectionFileSize == 0 && "Invalid size for section!");
return;
}
@@ -518,13 +617,13 @@ void MCAssembler::WriteSectionData(const MCSectionData *SD,
for (MCSectionData::const_iterator it = SD->begin(),
ie = SD->end(); it != ie; ++it)
- WriteFragmentData(*it, OW);
+ WriteFragmentData(*this, Layout, *it, OW);
// Add section padding.
- assert(SD->getFileSize() >= SD->getSize() && "Invalid section sizes!");
- OW->WriteZeros(SD->getFileSize() - SD->getSize());
+ assert(SectionFileSize >= SectionSize && "Invalid section sizes!");
+ OW->WriteZeros(SectionFileSize - SectionSize);
- assert(OW->getStream().tell() - Start == SD->getFileSize());
+ assert(OW->getStream().tell() - Start == SectionFileSize);
}
void MCAssembler::Finish() {
@@ -532,15 +631,35 @@ void MCAssembler::Finish() {
llvm::errs() << "assembler backend - pre-layout\n--\n";
dump(); });
+ // Assign section and fragment ordinals, all subsequent backend code is
+ // responsible for updating these in place.
+ unsigned SectionIndex = 0;
+ unsigned FragmentIndex = 0;
+ for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
+ it->setOrdinal(SectionIndex++);
+
+ for (MCSectionData::iterator it2 = it->begin(),
+ ie2 = it->end(); it2 != ie2; ++it2)
+ it2->setOrdinal(FragmentIndex++);
+ }
+
// Layout until everything fits.
- while (LayoutOnce())
+ MCAsmLayout Layout(*this);
+ while (LayoutOnce(Layout))
continue;
DEBUG_WITH_TYPE("mc-dump", {
- llvm::errs() << "assembler backend - post-layout\n--\n";
+ llvm::errs() << "assembler backend - post-relaxation\n--\n";
+ dump(); });
+
+ // Finalize the layout, including fragment lowering.
+ FinishLayout(Layout);
+
+ DEBUG_WITH_TYPE("mc-dump", {
+ llvm::errs() << "assembler backend - final-layout\n--\n";
dump(); });
- // FIXME: Factor out MCObjectWriter.
+ uint64_t StartOffset = OS.tell();
llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS));
if (!Writer)
llvm_report_error("unable to create object writer!");
@@ -550,9 +669,6 @@ void MCAssembler::Finish() {
Writer->ExecutePostLayoutBinding(*this);
// Evaluate and apply the fixups, generating relocation entries as necessary.
- //
- // FIXME: Share layout object.
- MCAsmLayout Layout(*this);
for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) {
for (MCSectionData::iterator it2 = it->begin(),
ie2 = it->end(); it2 != ie2; ++it2) {
@@ -571,7 +687,7 @@ void MCAssembler::Finish() {
// The fixup was unresolved, we need a relocation. Inform the object
// writer of the relocation, and give it an opportunity to adjust the
// fixup value if need be.
- Writer->RecordRelocation(*this, *DF, Fixup, Target, FixedValue);
+ Writer->RecordRelocation(*this, Layout, DF, Fixup, Target,FixedValue);
}
getBackend().ApplyFixup(Fixup, *DF, FixedValue);
@@ -580,17 +696,17 @@ void MCAssembler::Finish() {
}
// Write the object file.
- Writer->WriteObject(*this);
+ Writer->WriteObject(*this, Layout);
OS.flush();
-}
-bool MCAssembler::FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF) {
- // FIXME: Share layout object.
- MCAsmLayout Layout(*this);
+ stats::ObjectBytes += OS.tell() - StartOffset;
+}
- // Currently we only need to relax X86::reloc_pcrel_1byte.
- if (unsigned(Fixup.Kind) != X86::reloc_pcrel_1byte)
- return false;
+bool MCAssembler::FixupNeedsRelaxation(const MCAsmFixup &Fixup,
+ const MCFragment *DF,
+ const MCAsmLayout &Layout) const {
+ if (getRelaxAll())
+ return true;
// If we cannot resolve the fixup value, it requires relaxation.
MCValue Target;
@@ -602,135 +718,141 @@ bool MCAssembler::FixupNeedsRelaxation(MCAsmFixup &Fixup, MCDataFragment *DF) {
return int64_t(Value) != int64_t(int8_t(Value));
}
-bool MCAssembler::LayoutOnce() {
+bool MCAssembler::FragmentNeedsRelaxation(const MCInstFragment *IF,
+ const MCAsmLayout &Layout) const {
+ // If this inst doesn't ever need relaxation, ignore it. This occurs when we
+ // are intentionally pushing out inst fragments, or because we relaxed a
+ // previous instruction to one that doesn't need relaxation.
+ if (!getBackend().MayNeedRelaxation(IF->getInst(), IF->getFixups()))
+ return false;
+
+ for (MCInstFragment::const_fixup_iterator it = IF->fixup_begin(),
+ ie = IF->fixup_end(); it != ie; ++it)
+ if (FixupNeedsRelaxation(*it, IF, Layout))
+ return true;
+
+ return false;
+}
+
+bool MCAssembler::LayoutOnce(MCAsmLayout &Layout) {
+ ++stats::RelaxationSteps;
+
// Layout the concrete sections and fragments.
uint64_t Address = 0;
- MCSectionData *Prev = 0;
for (iterator it = begin(), ie = end(); it != ie; ++it) {
- MCSectionData &SD = *it;
-
// Skip virtual sections.
- if (getBackend().isVirtualSection(SD.getSection()))
+ if (getBackend().isVirtualSection(it->getSection()))
continue;
- // Align this section if necessary by adding padding bytes to the previous
- // section.
- if (uint64_t Pad = OffsetToAlignment(Address, it->getAlignment())) {
- assert(Prev && "Missing prev section!");
- Prev->setFileSize(Prev->getFileSize() + Pad);
- Address += Pad;
- }
-
// Layout the section fragments and its size.
- SD.setAddress(Address);
- LayoutSection(SD);
- Address += SD.getFileSize();
-
- Prev = &SD;
+ Address = LayoutSection(*it, Layout, Address);
}
// Layout the virtual sections.
for (iterator it = begin(), ie = end(); it != ie; ++it) {
- MCSectionData &SD = *it;
-
- if (!getBackend().isVirtualSection(SD.getSection()))
+ if (!getBackend().isVirtualSection(it->getSection()))
continue;
- // Align this section if necessary by adding padding bytes to the previous
- // section.
- if (uint64_t Pad = OffsetToAlignment(Address, it->getAlignment()))
- Address += Pad;
-
- SD.setAddress(Address);
- LayoutSection(SD);
- Address += SD.getSize();
+ // Layout the section fragments and its size.
+ Address = LayoutSection(*it, Layout, Address);
}
- // Scan the fixups in order and relax any that don't fit.
+ // Scan for fragments that need relaxation.
+ bool WasRelaxed = false;
for (iterator it = begin(), ie = end(); it != ie; ++it) {
MCSectionData &SD = *it;
for (MCSectionData::iterator it2 = SD.begin(),
ie2 = SD.end(); it2 != ie2; ++it2) {
- MCDataFragment *DF = dyn_cast<MCDataFragment>(it2);
- if (!DF)
+ // Check if this is an instruction fragment that needs relaxation.
+ MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+ if (!IF || !FragmentNeedsRelaxation(IF, Layout))
continue;
- for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(),
- ie3 = DF->fixup_end(); it3 != ie3; ++it3) {
- MCAsmFixup &Fixup = *it3;
-
- // Check whether we need to relax this fixup.
- if (!FixupNeedsRelaxation(Fixup, DF))
- continue;
-
- // Relax the instruction.
- //
- // FIXME: This is a huge temporary hack which just looks for x86
- // branches; the only thing we need to relax on x86 is
- // 'X86::reloc_pcrel_1byte'. Once we have MCInst fragments, this will be
- // replaced by a TargetAsmBackend hook (most likely tblgen'd) to relax
- // an individual MCInst.
- SmallVectorImpl<char> &C = DF->getContents();
- uint64_t PrevOffset = Fixup.Offset;
- unsigned Amt = 0;
-
- // jcc instructions
- if (unsigned(C[Fixup.Offset-1]) >= 0x70 &&
- unsigned(C[Fixup.Offset-1]) <= 0x7f) {
- C[Fixup.Offset] = C[Fixup.Offset-1] + 0x10;
- C[Fixup.Offset-1] = char(0x0f);
- ++Fixup.Offset;
- Amt = 4;
-
- // jmp rel8
- } else if (C[Fixup.Offset-1] == char(0xeb)) {
- C[Fixup.Offset-1] = char(0xe9);
- Amt = 3;
-
- } else
- llvm_unreachable("unknown 1 byte pcrel instruction!");
-
- Fixup.Value = MCBinaryExpr::Create(
- MCBinaryExpr::Sub, Fixup.Value,
- MCConstantExpr::Create(3, getContext()),
- getContext());
- C.insert(C.begin() + Fixup.Offset, Amt, char(0));
- Fixup.Kind = MCFixupKind(X86::reloc_pcrel_4byte);
-
- // Update the remaining fixups, which have slid.
- //
- // FIXME: This is bad for performance, but will be eliminated by the
- // move to MCInst specific fragments.
- ++it3;
- for (; it3 != ie3; ++it3)
- it3->Offset += Amt;
-
- // Update all the symbols for this fragment, which may have slid.
- //
- // FIXME: This is really really bad for performance, but will be
- // eliminated by the move to MCInst specific fragments.
- for (MCAssembler::symbol_iterator it = symbol_begin(),
- ie = symbol_end(); it != ie; ++it) {
- MCSymbolData &SD = *it;
-
- if (it->getFragment() != DF)
- continue;
-
- if (SD.getOffset() > PrevOffset)
- SD.setOffset(SD.getOffset() + Amt);
- }
-
- // Restart layout.
- //
- // FIXME: This is O(N^2), but will be eliminated once we have a smart
- // MCAsmLayout object.
- return true;
+ ++stats::RelaxedInstructions;
+
+ // FIXME-PERF: We could immediately lower out instructions if we can tell
+ // they are fully resolved, to avoid retesting on later passes.
+
+ // Relax the fragment.
+
+ MCInst Relaxed;
+ getBackend().RelaxInstruction(IF, Relaxed);
+
+ // Encode the new instruction.
+ //
+ // FIXME-PERF: If it matters, we could let the target do this. It can
+ // probably do so more efficiently in many cases.
+ SmallVector<MCFixup, 4> Fixups;
+ SmallString<256> Code;
+ raw_svector_ostream VecOS(Code);
+ getEmitter().EncodeInstruction(Relaxed, VecOS, Fixups);
+ VecOS.flush();
+
+ // Update the instruction fragment.
+ int SlideAmount = Code.size() - IF->getInstSize();
+ IF->setInst(Relaxed);
+ IF->getCode() = Code;
+ IF->getFixups().clear();
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ MCFixup &F = Fixups[i];
+ IF->getFixups().push_back(MCAsmFixup(F.getOffset(), *F.getValue(),
+ F.getKind()));
}
+
+ // Update the layout, and remember that we relaxed. If we are relaxing
+ // everything, we can skip this step since nothing will depend on updating
+ // the values.
+ if (!getRelaxAll())
+ Layout.UpdateForSlide(IF, SlideAmount);
+ WasRelaxed = true;
}
}
- return false;
+ return WasRelaxed;
+}
+
+void MCAssembler::FinishLayout(MCAsmLayout &Layout) {
+ // Lower out any instruction fragments, to simplify the fixup application and
+ // output.
+ //
+ // FIXME-PERF: We don't have to do this, but the assumption is that it is
+ // cheap (we will mostly end up eliminating fragments and appending on to data
+ // fragments), so the extra complexity downstream isn't worth it. Evaluate
+ // this assumption.
+ for (iterator it = begin(), ie = end(); it != ie; ++it) {
+ MCSectionData &SD = *it;
+
+ for (MCSectionData::iterator it2 = SD.begin(),
+ ie2 = SD.end(); it2 != ie2; ++it2) {
+ MCInstFragment *IF = dyn_cast<MCInstFragment>(it2);
+ if (!IF)
+ continue;
+
+ // Create a new data fragment for the instruction.
+ //
+ // FIXME-PERF: Reuse previous data fragment if possible.
+ MCDataFragment *DF = new MCDataFragment();
+ SD.getFragmentList().insert(it2, DF);
+
+ // Update the data fragments layout data.
+ //
+ // FIXME: Add MCAsmLayout utility for this.
+ DF->setParent(IF->getParent());
+ DF->setOrdinal(IF->getOrdinal());
+ Layout.setFragmentOffset(DF, Layout.getFragmentOffset(IF));
+ Layout.setFragmentEffectiveSize(DF, Layout.getFragmentEffectiveSize(IF));
+
+ // Copy in the data and the fixups.
+ DF->getContents().append(IF->getCode().begin(), IF->getCode().end());
+ for (unsigned i = 0, e = IF->getFixups().size(); i != e; ++i)
+ DF->getFixups().push_back(IF->getFixups()[i]);
+
+ // Delete the instruction fragment and update the iterator.
+ SD.getFragmentList().erase(IF);
+ it2 = DF;
+ }
+ }
}
// Debugging methods
@@ -749,7 +871,7 @@ void MCFragment::dump() {
raw_ostream &OS = llvm::errs();
OS << "<MCFragment " << (void*) this << " Offset:" << Offset
- << " FileSize:" << FileSize;
+ << " EffectiveSize:" << EffectiveSize;
OS << ">";
}
@@ -801,6 +923,17 @@ void MCFillFragment::dump() {
<< " Count:" << getCount() << ">";
}
+void MCInstFragment::dump() {
+ raw_ostream &OS = llvm::errs();
+
+ OS << "<MCInstFragment ";
+ this->MCFragment::dump();
+ OS << "\n ";
+ OS << " Inst:";
+ getInst().dump_pretty(OS);
+ OS << ">";
+}
+
void MCOrgFragment::dump() {
raw_ostream &OS = llvm::errs();
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 37e8282..e02cbc7 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -23,9 +23,12 @@ MCContext::~MCContext() {
// we don't need to free them here.
}
-MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name, bool isTemporary) {
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
assert(!Name.empty() && "Normal symbols cannot be unnamed!");
+ // Determine whether this is an assembler temporary or normal label.
+ bool isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix());
+
// Do the lookup and get the entire StringMapEntry. We want access to the
// key if we are creating the entry.
StringMapEntry<MCSymbol*> &Entry = Symbols.GetOrCreateValue(Name);
@@ -38,24 +41,17 @@ MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name, bool isTemporary) {
return Result;
}
-MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name, bool isTemporary) {
+MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
SmallString<128> NameSV;
Name.toVector(NameSV);
- return GetOrCreateSymbol(NameSV.str(), isTemporary);
+ return GetOrCreateSymbol(NameSV.str());
}
MCSymbol *MCContext::CreateTempSymbol() {
- return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) +
- "tmp" + Twine(NextUniqueID++));
-}
-
-MCSymbol *MCContext::GetOrCreateTemporarySymbol(const Twine &Name) {
- SmallString<128> NameSV;
- Name.toVector(NameSV);
- return GetOrCreateTemporarySymbol(NameSV.str());
+ return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) +
+ "tmp" + Twine(NextUniqueID++));
}
-
MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
return Symbols.lookup(Name);
}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 2759944..bc670ab 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -7,7 +7,9 @@
//
//===----------------------------------------------------------------------===//
+#define DEBUG_TYPE "mcexpr"
#include "llvm/MC/MCExpr.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCAssembler.h"
@@ -19,6 +21,12 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
+namespace {
+namespace stats {
+STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations");
+}
+}
+
void MCExpr::print(raw_ostream &OS) const {
switch (getKind()) {
case MCExpr::Target:
@@ -146,12 +154,6 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, VariantKind Kind,
return Create(Ctx.GetOrCreateSymbol(Name), Kind, Ctx);
}
-const MCSymbolRefExpr *MCSymbolRefExpr::CreateTemp(StringRef Name,
- VariantKind Kind,
- MCContext &Ctx) {
- return Create(Ctx.GetOrCreateTemporarySymbol(Name), Kind, Ctx);
-}
-
StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
switch (Kind) {
default:
@@ -194,6 +196,12 @@ void MCTargetExpr::Anchor() {}
bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const {
MCValue Value;
+ // Fast path constants.
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(this)) {
+ Res = CE->getValue();
+ return true;
+ }
+
if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute())
return false;
@@ -225,6 +233,8 @@ static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A,
bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
const MCAsmLayout *Layout) const {
+ ++stats::MCExprEvaluate;
+
switch (getKind()) {
case Target:
return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout);
@@ -252,8 +262,8 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res,
Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol());
MCSymbolData &B =
Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol());
- Res = MCValue::get(+ A.getFragment()->getAddress() + A.getOffset()
- - B.getFragment()->getAddress() - B.getOffset()
+ Res = MCValue::get(+ Layout->getSymbolAddress(&A)
+ - Layout->getSymbolAddress(&B)
+ Res.getConstant());
}
diff --git a/lib/MC/MCInst.cpp b/lib/MC/MCInst.cpp
index 0634c9f..de142dc 100644
--- a/lib/MC/MCInst.cpp
+++ b/lib/MC/MCInst.cpp
@@ -9,6 +9,7 @@
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInstPrinter.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -43,6 +44,22 @@ void MCInst::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
OS << ">";
}
+void MCInst::dump_pretty(raw_ostream &OS, const MCAsmInfo *MAI,
+ const MCInstPrinter *Printer,
+ StringRef Separator) const {
+ OS << "<MCInst #" << getOpcode();
+
+ // Show the instruction opcode name if we have access to a printer.
+ if (Printer)
+ OS << ' ' << Printer->getOpcodeName(getOpcode());
+
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ OS << Separator;
+ getOperand(i).print(OS, MAI);
+ }
+ OS << ">\n";
+}
+
void MCInst::dump() const {
print(dbgs(), 0);
dbgs() << "\n";
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 9504392..120f837 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -18,6 +18,8 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetAsmBackend.h"
+
using namespace llvm;
namespace {
@@ -57,6 +59,15 @@ private:
return 0;
}
+ /// Get a data fragment to write into, creating a new one if the current
+ /// fragment is not a data fragment.
+ MCDataFragment *getOrCreateDataFragment() const {
+ MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
+ if (!F)
+ F = new MCDataFragment(CurSectionData);
+ return F;
+ }
+
public:
MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
raw_ostream &_OS, MCCodeEmitter *_Emitter)
@@ -64,6 +75,8 @@ public:
CurSectionData(0) {}
~MCMachOStreamer() {}
+ MCAssembler &getAssembler() { return Assembler; }
+
const MCExpr *AddValueSymbols(const MCExpr *Value) {
switch (Value->getKind()) {
case MCExpr::Target: assert(0 && "Can't handle target exprs yet!");
@@ -150,11 +163,11 @@ void MCMachOStreamer::SwitchSection(const MCSection *Section) {
void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
- // FIXME: We should also use offsets into Fill fragments.
- MCDataFragment *F = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- if (!F)
- F = new MCDataFragment(CurSectionData);
-
+ // FIXME: This is wasteful, we don't necessarily need to create a data
+ // fragment. Instead, we should mark the symbol as pointing into the data
+ // fragment if it exists, otherwise we should just queue the label and set its
+ // fragment pointer when we emit the next fragment.
+ MCDataFragment *F = getOrCreateDataFragment();
MCSymbolData &SD = Assembler.getOrCreateSymbolData(*Symbol);
assert(!SD.getFragment() && "Unexpected fragment on symbol data!");
SD.setFragment(F);
@@ -307,17 +320,12 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
}
void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
- MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- if (!DF)
- DF = new MCDataFragment(CurSectionData);
- DF->getContents().append(Data.begin(), Data.end());
+ getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end());
}
void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
- MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- if (!DF)
- DF = new MCDataFragment(CurSectionData);
+ MCDataFragment *DF = getOrCreateDataFragment();
// Avoid fixups when possible.
int64_t AbsValue;
@@ -349,8 +357,7 @@ void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
- // FIXME the 0x90 is the default x86 1 byte nop opcode.
- new MCAlignFragment(ByteAlignment, 0x90, 1, MaxBytesToEmit,
+ new MCAlignFragment(ByteAlignment, 0, 1, MaxBytesToEmit,
true /* EmitNops */, CurSectionData);
// Update the maximum alignment on the current section if necessary.
@@ -371,20 +378,54 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) {
CurSectionData->setHasInstructions(true);
+ // FIXME-PERF: Common case is that we don't need to relax, encode directly
+ // onto the data fragments buffers.
+
SmallVector<MCFixup, 4> Fixups;
SmallString<256> Code;
raw_svector_ostream VecOS(Code);
Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups);
VecOS.flush();
- // Add the fixups and data.
- MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
- if (!DF)
- DF = new MCDataFragment(CurSectionData);
+ // FIXME: Eliminate this copy.
+ SmallVector<MCAsmFixup, 4> AsmFixups;
for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
MCFixup &F = Fixups[i];
- DF->addFixup(MCAsmFixup(DF->getContents().size()+F.getOffset(),
- *F.getValue(), F.getKind()));
+ AsmFixups.push_back(MCAsmFixup(F.getOffset(), *F.getValue(),
+ F.getKind()));
+ }
+
+ // See if we might need to relax this instruction, if so it needs its own
+ // fragment.
+ //
+ // FIXME-PERF: Support target hook to do a fast path that avoids the encoder,
+ // when we can immediately tell that we will get something which might need
+ // relaxation (and compute its size).
+ //
+ // FIXME-PERF: We should also be smart about immediately relaxing instructions
+ // which we can already show will never possibly fit (we can also do a very
+ // good job of this before we do the first relaxation pass, because we have
+ // total knowledge about undefined symbols at that point). Even now, though,
+ // we can do a decent job, especially on Darwin where scattering means that we
+ // are going to often know that we can never fully resolve a fixup.
+ if (Assembler.getBackend().MayNeedRelaxation(Inst, AsmFixups)) {
+ MCInstFragment *IF = new MCInstFragment(Inst, CurSectionData);
+
+ // Add the fixups and data.
+ //
+ // FIXME: Revisit this design decision when relaxation is done, we may be
+ // able to get away with not storing any extra data in the MCInst.
+ IF->getCode() = Code;
+ IF->getFixups() = AsmFixups;
+
+ return;
+ }
+
+ // Add the fixups and data.
+ MCDataFragment *DF = getOrCreateDataFragment();
+ for (unsigned i = 0, e = AsmFixups.size(); i != e; ++i) {
+ AsmFixups[i].Offset += DF->getContents().size();
+ DF->addFixup(AsmFixups[i]);
}
DF->getContents().append(Code.begin(), Code.end());
}
@@ -394,6 +435,10 @@ void MCMachOStreamer::Finish() {
}
MCStreamer *llvm::createMachOStreamer(MCContext &Context, TargetAsmBackend &TAB,
- raw_ostream &OS, MCCodeEmitter *CE) {
- return new MCMachOStreamer(Context, TAB, OS, CE);
+ raw_ostream &OS, MCCodeEmitter *CE,
+ bool RelaxAll) {
+ MCMachOStreamer *S = new MCMachOStreamer(Context, TAB, OS, CE);
+ if (RelaxAll)
+ S->getAssembler().setRelaxAll(true);
+ return S;
}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 4ec5247..24616b4 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -238,9 +238,7 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) {
}
MCSymbol *AsmParser::CreateSymbol(StringRef Name) {
- // If the label starts with L it is an assembler temporary label.
- if (Name.startswith("L"))
- return Ctx.GetOrCreateTemporarySymbol(Name);
+ // FIXME: Inline into callers.
return Ctx.GetOrCreateSymbol(Name);
}
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 24c89ef..f6e9636 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -26,7 +26,11 @@ MCSection::~MCSection() {
MCSectionCOFF *MCSectionCOFF::
Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
- return new (Ctx) MCSectionCOFF(Name, IsDirective, K);
+ char *NameCopy = static_cast<char*>(
+ Ctx.Allocate(Name.size(), /*Alignment=*/1));
+ memcpy(NameCopy, Name.data(), Name.size());
+ return new (Ctx) MCSectionCOFF(StringRef(NameCopy, Name.size()),
+ IsDirective, K);
}
void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index 4b08c22..e073eb5 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -11,6 +11,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
@@ -271,12 +272,14 @@ public:
assert(OS.tell() - Start == SegmentLoadCommandSize);
}
- void WriteSection(const MCAssembler &Asm, const MCSectionData &SD,
- uint64_t FileOffset, uint64_t RelocationsStart,
- unsigned NumRelocations) {
+ void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCSectionData &SD, uint64_t FileOffset,
+ uint64_t RelocationsStart, unsigned NumRelocations) {
+ uint64_t SectionSize = Layout.getSectionSize(&SD);
+
// The offset is unused for virtual sections.
if (Asm.getBackend().isVirtualSection(SD.getSection())) {
- assert(SD.getFileSize() == 0 && "Invalid file size!");
+ assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
FileOffset = 0;
}
@@ -292,11 +295,11 @@ public:
WriteBytes(Section.getSectionName(), 16);
WriteBytes(Section.getSegmentName(), 16);
if (Is64Bit) {
- Write64(SD.getAddress()); // address
- Write64(SD.getSize()); // size
+ Write64(Layout.getSectionAddress(&SD)); // address
+ Write64(SectionSize); // size
} else {
- Write32(SD.getAddress()); // address
- Write32(SD.getSize()); // size
+ Write32(Layout.getSectionAddress(&SD)); // address
+ Write32(SectionSize); // size
}
Write32(FileOffset);
@@ -372,7 +375,7 @@ public:
assert(OS.tell() - Start == DysymtabLoadCommandSize);
}
- void WriteNlist(MachSymbolData &MSD) {
+ void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
MCSymbolData &Data = *MSD.SymbolData;
const MCSymbol &Symbol = Data.getSymbol();
uint8_t Type = 0;
@@ -403,7 +406,7 @@ public:
if (Symbol.isAbsolute()) {
llvm_unreachable("FIXME: Not yet implemented!");
} else {
- Address = Data.getAddress();
+ Address = Layout.getSymbolAddress(&Data);
}
} else if (Data.isCommon()) {
// Common symbols are encoded with the size in the address
@@ -437,8 +440,22 @@ public:
Write32(Address);
}
- void RecordX86_64Relocation(const MCAssembler &Asm,
- const MCDataFragment &Fragment,
+ // FIXME: We really need to improve the relocation validation. Basically, we
+ // want to implement a separate computation which evaluates the relocation
+ // entry as the linker would, and verifies that the resultant fixup value is
+ // exactly what the encoder wanted. This will catch several classes of
+ // problems:
+ //
+ // - Relocation entry bugs, the two algorithms are unlikely to have the same
+ // exact bug.
+ //
+ // - Relaxation issues, where we forget to relax something.
+ //
+ // - Input errors, where something cannot be correctly encoded. 'as' allows
+ // these through in many cases.
+
+ void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
const MCAsmFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
@@ -446,7 +463,7 @@ public:
unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
// See <reloc.h>.
- uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+ uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
int64_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
@@ -480,11 +497,11 @@ public:
} else if (Target.getSymB()) { // A - B + constant
const MCSymbol *A = &Target.getSymA()->getSymbol();
MCSymbolData &A_SD = Asm.getSymbolData(*A);
- const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+ const MCSymbolData *A_Base = Asm.getAtom(Layout, &A_SD);
const MCSymbol *B = &Target.getSymB()->getSymbol();
MCSymbolData &B_SD = Asm.getSymbolData(*B);
- const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+ const MCSymbolData *B_Base = Asm.getAtom(Layout, &B_SD);
// Neither symbol can be modified.
if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
@@ -507,8 +524,8 @@ public:
if (A_Base == B_Base)
llvm_report_error("unsupported relocation with identical base");
- Value += A_SD.getAddress() - A_Base->getAddress();
- Value -= B_SD.getAddress() - B_Base->getAddress();
+ Value += Layout.getSymbolAddress(&A_SD) - Layout.getSymbolAddress(A_Base);
+ Value -= Layout.getSymbolAddress(&B_SD) - Layout.getSymbolAddress(B_Base);
Index = A_Base->getIndex();
IsExtern = 1;
@@ -521,7 +538,7 @@ public:
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
- Relocations[Fragment.getParent()].push_back(MRE);
+ Relocations[Fragment->getParent()].push_back(MRE);
Index = B_Base->getIndex();
IsExtern = 1;
@@ -529,7 +546,7 @@ public:
} else {
const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
MCSymbolData &SD = Asm.getSymbolData(*Symbol);
- const MCSymbolData *Base = Asm.getAtom(&SD);
+ const MCSymbolData *Base = Asm.getAtom(Layout, &SD);
// x86_64 almost always uses external relocations, except when there is no
// symbol to use as a base address (a local symbol with no preceeding
@@ -540,19 +557,12 @@ public:
// Add the local offset, if needed.
if (Base != &SD)
- Value += SD.getAddress() - Base->getAddress();
+ Value += Layout.getSymbolAddress(&SD) - Layout.getSymbolAddress(Base);
} else {
- // The index is the section ordinal.
- //
- // FIXME: O(N)
- Index = 1;
- MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
- for (; it != ie; ++it, ++Index)
- if (&*it == SD.getFragment()->getParent())
- break;
- assert(it != ie && "Unable to find section index!");
+ // The index is the section ordinal (1-based).
+ Index = SD.getFragment()->getParent()->getOrdinal() + 1;
IsExtern = 0;
- Value += SD.getAddress();
+ Value += Layout.getSymbolAddress(&SD);
if (IsPCRel)
Value -= Address + (1 << Log2Size);
@@ -602,9 +612,16 @@ public:
}
}
} else {
- if (Modifier == MCSymbolRefExpr::VK_GOT)
+ if (Modifier == MCSymbolRefExpr::VK_GOT) {
Type = RIT_X86_64_GOT;
- else if (Modifier != MCSymbolRefExpr::VK_None)
+ } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+ // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
+ // which case all we do is set the PCrel bit in the relocation entry;
+ // this is used with exception handling, for example. The source is
+ // required to include any necessary offset directly.
+ Type = RIT_X86_64_GOT;
+ IsPCRel = 1;
+ } else if (Modifier != MCSymbolRefExpr::VK_None)
llvm_report_error("unsupported symbol modifier in relocation");
else
Type = RIT_X86_64_Unsigned;
@@ -622,14 +639,15 @@ public:
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
- Relocations[Fragment.getParent()].push_back(MRE);
+ Relocations[Fragment->getParent()].push_back(MRE);
}
void RecordScatteredRelocation(const MCAssembler &Asm,
- const MCFragment &Fragment,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
const MCAsmFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+ uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind);
unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
unsigned Type = RIT_Vanilla;
@@ -642,7 +660,7 @@ public:
llvm_report_error("symbol '" + A->getName() +
"' can not be undefined in a subtraction expression");
- uint32_t Value = A_SD->getAddress();
+ uint32_t Value = Layout.getSymbolAddress(A_SD);
uint32_t Value2 = 0;
if (const MCSymbolRefExpr *B = Target.getSymB()) {
@@ -658,7 +676,7 @@ public:
// relocation types from the linkers point of view, this is done solely
// for pedantic compatibility with 'as'.
Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference;
- Value2 = B_SD->getAddress();
+ Value2 = Layout.getSymbolAddress(B_SD);
}
// Relocations are written out in reverse order, so the PAIR comes first.
@@ -670,7 +688,7 @@ public:
(IsPCRel << 30) |
RF_Scattered);
MRE.Word1 = Value2;
- Relocations[Fragment.getParent()].push_back(MRE);
+ Relocations[Fragment->getParent()].push_back(MRE);
}
MachRelocationEntry MRE;
@@ -680,14 +698,14 @@ public:
(IsPCRel << 30) |
RF_Scattered);
MRE.Word1 = Value;
- Relocations[Fragment.getParent()].push_back(MRE);
+ Relocations[Fragment->getParent()].push_back(MRE);
}
- void RecordRelocation(const MCAssembler &Asm, const MCDataFragment &Fragment,
- const MCAsmFixup &Fixup, MCValue Target,
- uint64_t &FixedValue) {
+ void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCAsmFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue) {
if (Is64Bit) {
- RecordX86_64Relocation(Asm, Fragment, Fixup, Target, FixedValue);
+ RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
return;
}
@@ -702,12 +720,12 @@ public:
if (Target.getSymB() ||
(Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() &&
Offset)) {
- RecordScatteredRelocation(Asm, Fragment, Fixup, Target, FixedValue);
+ RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,Target,FixedValue);
return;
}
// See <reloc.h>.
- uint32_t Address = Fragment.getOffset() + Fixup.Offset;
+ uint32_t Address = Layout.getFragmentOffset(Fragment) + Fixup.Offset;
uint32_t Value = 0;
unsigned Index = 0;
unsigned IsExtern = 0;
@@ -729,16 +747,9 @@ public:
Index = SD->getIndex();
Value = 0;
} else {
- // The index is the section ordinal.
- //
- // FIXME: O(N)
- Index = 1;
- MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end();
- for (; it != ie; ++it, ++Index)
- if (&*it == SD->getFragment()->getParent())
- break;
- assert(it != ie && "Unable to find section index!");
- Value = SD->getAddress();
+ // The index is the section ordinal (1-based).
+ Index = SD->getFragment()->getParent()->getOrdinal() + 1;
+ Value = Layout.getSymbolAddress(SD);
}
Type = RIT_Vanilla;
@@ -752,7 +763,7 @@ public:
(Log2Size << 25) |
(IsExtern << 27) |
(Type << 28));
- Relocations[Fragment.getParent()].push_back(MRE);
+ Relocations[Fragment->getParent()].push_back(MRE);
}
void BindIndirectSymbols(MCAssembler &Asm) {
@@ -920,7 +931,7 @@ public:
UndefinedSymbolData);
}
- void WriteObject(const MCAssembler &Asm) {
+ void WriteObject(const MCAssembler &Asm, const MCAsmLayout &Layout) {
unsigned NumSections = Asm.size();
// The section data starts after the header, the segment load command (and
@@ -948,16 +959,17 @@ public:
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it) {
const MCSectionData &SD = *it;
+ uint64_t Address = Layout.getSectionAddress(&SD);
+ uint64_t Size = Layout.getSectionSize(&SD);
+ uint64_t FileSize = Layout.getSectionFileSize(&SD);
- VMSize = std::max(VMSize, SD.getAddress() + SD.getSize());
+ VMSize = std::max(VMSize, Address + Size);
if (Asm.getBackend().isVirtualSection(SD.getSection()))
continue;
- SectionDataSize = std::max(SectionDataSize,
- SD.getAddress() + SD.getSize());
- SectionDataFileSize = std::max(SectionDataFileSize,
- SD.getAddress() + SD.getFileSize());
+ SectionDataSize = std::max(SectionDataSize, Address + Size);
+ SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
}
// The section data is padded to 4 bytes.
@@ -978,8 +990,8 @@ public:
ie = Asm.end(); it != ie; ++it) {
std::vector<MachRelocationEntry> &Relocs = Relocations[it];
unsigned NumRelocs = Relocs.size();
- uint64_t SectionStart = SectionDataStart + it->getAddress();
- WriteSection(Asm, *it, SectionStart, RelocTableEnd, NumRelocs);
+ uint64_t SectionStart = SectionDataStart + Layout.getSectionAddress(it);
+ WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
RelocTableEnd += NumRelocs * RelocationInfoSize;
}
@@ -1020,7 +1032,7 @@ public:
// Write the actual section data.
for (MCAssembler::const_iterator it = Asm.begin(),
ie = Asm.end(); it != ie; ++it)
- Asm.WriteSectionData(it, Writer);
+ Asm.WriteSectionData(it, Layout, Writer);
// Write the extra padding.
WriteZeros(SectionDataPadding);
@@ -1066,11 +1078,11 @@ public:
// Write the symbol table entries.
for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
- WriteNlist(LocalSymbolData[i]);
+ WriteNlist(LocalSymbolData[i], Layout);
for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
- WriteNlist(ExternalSymbolData[i]);
+ WriteNlist(ExternalSymbolData[i], Layout);
for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
- WriteNlist(UndefinedSymbolData[i]);
+ WriteNlist(UndefinedSymbolData[i], Layout);
// Write the string table.
OS << StringTable.str();
@@ -1097,13 +1109,15 @@ void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) {
}
void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
- const MCDataFragment &Fragment,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
const MCAsmFixup &Fixup, MCValue Target,
uint64_t &FixedValue) {
- ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Fragment, Fixup,
+ ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Layout, Fragment, Fixup,
Target, FixedValue);
}
-void MachObjectWriter::WriteObject(const MCAssembler &Asm) {
- ((MachObjectWriterImpl*) Impl)->WriteObject(Asm);
+void MachObjectWriter::WriteObject(const MCAssembler &Asm,
+ const MCAsmLayout &Layout) {
+ ((MachObjectWriterImpl*) Impl)->WriteObject(Asm, Layout);
}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 8f860a6..485bf4d 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -65,7 +65,7 @@ namespace llvm {
pow(5, power) is
power * 815 / (351 * integerPartWidth) + 1
-
+
However, whilst the result may require only this many parts,
because we are multiplying two values to get it, the
multiplication may require an extra part with the excess part
@@ -100,15 +100,15 @@ hexDigitValue(unsigned int c)
unsigned int r;
r = c - '0';
- if(r <= 9)
+ if (r <= 9)
return r;
r = c - 'A';
- if(r <= 5)
+ if (r <= 5)
return r + 10;
r = c - 'a';
- if(r <= 5)
+ if (r <= 5)
return r + 10;
return -1U;
@@ -116,8 +116,8 @@ hexDigitValue(unsigned int c)
static inline void
assertArithmeticOK(const llvm::fltSemantics &semantics) {
- assert(semantics.arithmeticOK
- && "Compile-time arithmetic does not support these semantics");
+ assert(semantics.arithmeticOK &&
+ "Compile-time arithmetic does not support these semantics");
}
/* Return the value of a decimal exponent of the form
@@ -179,37 +179,37 @@ totalExponent(StringRef::iterator p, StringRef::iterator end,
assert(p != end && "Exponent has no digits");
negative = *p == '-';
- if(*p == '-' || *p == '+') {
+ if (*p == '-' || *p == '+') {
p++;
assert(p != end && "Exponent has no digits");
}
unsignedExponent = 0;
overflow = false;
- for(; p != end; ++p) {
+ for (; p != end; ++p) {
unsigned int value;
value = decDigitValue(*p);
assert(value < 10U && "Invalid character in exponent");
unsignedExponent = unsignedExponent * 10 + value;
- if(unsignedExponent > 65535)
+ if (unsignedExponent > 65535)
overflow = true;
}
- if(exponentAdjustment > 65535 || exponentAdjustment < -65536)
+ if (exponentAdjustment > 65535 || exponentAdjustment < -65536)
overflow = true;
- if(!overflow) {
+ if (!overflow) {
exponent = unsignedExponent;
- if(negative)
+ if (negative)
exponent = -exponent;
exponent += exponentAdjustment;
- if(exponent > 65535 || exponent < -65536)
+ if (exponent > 65535 || exponent < -65536)
overflow = true;
}
- if(overflow)
+ if (overflow)
exponent = negative ? -65536: 65535;
return exponent;
@@ -221,15 +221,15 @@ skipLeadingZeroesAndAnyDot(StringRef::iterator begin, StringRef::iterator end,
{
StringRef::iterator p = begin;
*dot = end;
- while(*p == '0' && p != end)
+ while (*p == '0' && p != end)
p++;
- if(*p == '.') {
+ if (*p == '.') {
*dot = p++;
assert(end - begin != 1 && "Significand has no digits");
- while(*p == '0' && p != end)
+ while (*p == '0' && p != end)
p++;
}
@@ -323,13 +323,13 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
/* If the first trailing digit isn't 0 or 8 we can work out the
fraction immediately. */
- if(digitValue > 8)
+ if (digitValue > 8)
return lfMoreThanHalf;
- else if(digitValue < 8 && digitValue > 0)
+ else if (digitValue < 8 && digitValue > 0)
return lfLessThanHalf;
/* Otherwise we need to find the first non-zero digit. */
- while(*p == '0')
+ while (*p == '0')
p++;
assert(p != end && "Invalid trailing hexadecimal fraction!");
@@ -338,7 +338,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end,
/* If we ran off the end it is exactly zero or one-half, otherwise
a little more. */
- if(hexDigit == -1U)
+ if (hexDigit == -1U)
return digitValue == 0 ? lfExactlyZero: lfExactlyHalf;
else
return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf;
@@ -356,12 +356,12 @@ lostFractionThroughTruncation(const integerPart *parts,
lsb = APInt::tcLSB(parts, partCount);
/* Note this is guaranteed true if bits == 0, or LSB == -1U. */
- if(bits <= lsb)
+ if (bits <= lsb)
return lfExactlyZero;
- if(bits == lsb + 1)
+ if (bits == lsb + 1)
return lfExactlyHalf;
- if(bits <= partCount * integerPartWidth
- && APInt::tcExtractBit(parts, bits - 1))
+ if (bits <= partCount * integerPartWidth &&
+ APInt::tcExtractBit(parts, bits - 1))
return lfMoreThanHalf;
return lfLessThanHalf;
@@ -385,10 +385,10 @@ static lostFraction
combineLostFractions(lostFraction moreSignificant,
lostFraction lessSignificant)
{
- if(lessSignificant != lfExactlyZero) {
- if(moreSignificant == lfExactlyZero)
+ if (lessSignificant != lfExactlyZero) {
+ if (moreSignificant == lfExactlyZero)
moreSignificant = lfLessThanHalf;
- else if(moreSignificant == lfExactlyHalf)
+ else if (moreSignificant == lfExactlyHalf)
moreSignificant = lfMoreThanHalf;
}
@@ -468,7 +468,7 @@ powerOf5(integerPart *dst, unsigned int power)
15625, 78125 };
integerPart pow5s[maxPowerOfFiveParts * 2 + 5];
pow5s[0] = 78125 * 5;
-
+
unsigned int partsCount[16] = { 1 };
integerPart scratch[maxPowerOfFiveParts], *p1, *p2, *pow5;
unsigned int result;
@@ -588,14 +588,14 @@ APFloat::initialize(const fltSemantics *ourSemantics)
semantics = ourSemantics;
count = partCount();
- if(count > 1)
+ if (count > 1)
significand.parts = new integerPart[count];
}
void
APFloat::freeSignificand()
{
- if(partCount() > 1)
+ if (partCount() > 1)
delete [] significand.parts;
}
@@ -609,7 +609,7 @@ APFloat::assign(const APFloat &rhs)
exponent = rhs.exponent;
sign2 = rhs.sign2;
exponent2 = rhs.exponent2;
- if(category == fcNormal || category == fcNaN)
+ if (category == fcNormal || category == fcNaN)
copySignificand(rhs);
}
@@ -683,8 +683,8 @@ APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
APFloat &
APFloat::operator=(const APFloat &rhs)
{
- if(this != &rhs) {
- if(semantics != rhs.semantics) {
+ if (this != &rhs) {
+ if (semantics != rhs.semantics) {
freeSignificand();
initialize(rhs.semantics);
}
@@ -881,7 +881,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
precision = semantics->precision;
newPartsCount = partCountForBits(precision * 2);
- if(newPartsCount > 4)
+ if (newPartsCount > 4)
fullSignificand = new integerPart[newPartsCount];
else
fullSignificand = scratch;
@@ -896,7 +896,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1;
exponent += rhs.exponent;
- if(addend) {
+ if (addend) {
Significand savedSignificand = significand;
const fltSemantics *savedSemantics = semantics;
fltSemantics extendedSemantics;
@@ -905,18 +905,17 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
/* Normalize our MSB. */
extendedPrecision = precision + precision - 1;
- if(omsb != extendedPrecision)
- {
- APInt::tcShiftLeft(fullSignificand, newPartsCount,
- extendedPrecision - omsb);
- exponent -= extendedPrecision - omsb;
- }
+ if (omsb != extendedPrecision) {
+ APInt::tcShiftLeft(fullSignificand, newPartsCount,
+ extendedPrecision - omsb);
+ exponent -= extendedPrecision - omsb;
+ }
/* Create new semantics. */
extendedSemantics = *semantics;
extendedSemantics.precision = extendedPrecision;
- if(newPartsCount == 1)
+ if (newPartsCount == 1)
significand.part = fullSignificand[0];
else
significand.parts = fullSignificand;
@@ -928,7 +927,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
lost_fraction = addOrSubtractSignificand(extendedAddend, false);
/* Restore our state. */
- if(newPartsCount == 1)
+ if (newPartsCount == 1)
fullSignificand[0] = significand.part;
significand = savedSignificand;
semantics = savedSemantics;
@@ -938,7 +937,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
exponent -= (precision - 1);
- if(omsb > precision) {
+ if (omsb > precision) {
unsigned int bits, significantParts;
lostFraction lf;
@@ -951,7 +950,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
APInt::tcAssign(lhsSignificand, fullSignificand, partsCount);
- if(newPartsCount > 4)
+ if (newPartsCount > 4)
delete [] fullSignificand;
return lost_fraction;
@@ -973,7 +972,7 @@ APFloat::divideSignificand(const APFloat &rhs)
rhsSignificand = rhs.significandParts();
partsCount = partCount();
- if(partsCount > 2)
+ if (partsCount > 2)
dividend = new integerPart[partsCount * 2];
else
dividend = scratch;
@@ -981,7 +980,7 @@ APFloat::divideSignificand(const APFloat &rhs)
divisor = dividend + partsCount;
/* Copy the dividend and divisor as they will be modified in-place. */
- for(i = 0; i < partsCount; i++) {
+ for (i = 0; i < partsCount; i++) {
dividend[i] = lhsSignificand[i];
divisor[i] = rhsSignificand[i];
lhsSignificand[i] = 0;
@@ -993,14 +992,14 @@ APFloat::divideSignificand(const APFloat &rhs)
/* Normalize the divisor. */
bit = precision - APInt::tcMSB(divisor, partsCount) - 1;
- if(bit) {
+ if (bit) {
exponent += bit;
APInt::tcShiftLeft(divisor, partsCount, bit);
}
/* Normalize the dividend. */
bit = precision - APInt::tcMSB(dividend, partsCount) - 1;
- if(bit) {
+ if (bit) {
exponent -= bit;
APInt::tcShiftLeft(dividend, partsCount, bit);
}
@@ -1008,15 +1007,15 @@ APFloat::divideSignificand(const APFloat &rhs)
/* Ensure the dividend >= divisor initially for the loop below.
Incidentally, this means that the division loop below is
guaranteed to set the integer bit to one. */
- if(APInt::tcCompare(dividend, divisor, partsCount) < 0) {
+ if (APInt::tcCompare(dividend, divisor, partsCount) < 0) {
exponent--;
APInt::tcShiftLeft(dividend, partsCount, 1);
assert(APInt::tcCompare(dividend, divisor, partsCount) >= 0);
}
/* Long division. */
- for(bit = precision; bit; bit -= 1) {
- if(APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
+ for (bit = precision; bit; bit -= 1) {
+ if (APInt::tcCompare(dividend, divisor, partsCount) >= 0) {
APInt::tcSubtract(dividend, divisor, 0, partsCount);
APInt::tcSetBit(lhsSignificand, bit - 1);
}
@@ -1027,16 +1026,16 @@ APFloat::divideSignificand(const APFloat &rhs)
/* Figure out the lost fraction. */
int cmp = APInt::tcCompare(dividend, divisor, partsCount);
- if(cmp > 0)
+ if (cmp > 0)
lost_fraction = lfMoreThanHalf;
- else if(cmp == 0)
+ else if (cmp == 0)
lost_fraction = lfExactlyHalf;
- else if(APInt::tcIsZero(dividend, partsCount))
+ else if (APInt::tcIsZero(dividend, partsCount))
lost_fraction = lfExactlyZero;
else
lost_fraction = lfLessThanHalf;
- if(partsCount > 2)
+ if (partsCount > 2)
delete [] dividend;
return lost_fraction;
@@ -1072,7 +1071,7 @@ APFloat::shiftSignificandLeft(unsigned int bits)
{
assert(bits < semantics->precision);
- if(bits) {
+ if (bits) {
unsigned int partsCount = partCount();
APInt::tcShiftLeft(significandParts(), partsCount, bits);
@@ -1095,13 +1094,13 @@ APFloat::compareAbsoluteValue(const APFloat &rhs) const
/* If exponents are equal, do an unsigned bignum comparison of the
significands. */
- if(compare == 0)
+ if (compare == 0)
compare = APInt::tcCompare(significandParts(), rhs.significandParts(),
partCount());
- if(compare > 0)
+ if (compare > 0)
return cmpGreaterThan;
- else if(compare < 0)
+ else if (compare < 0)
return cmpLessThan;
else
return cmpEqual;
@@ -1113,14 +1112,13 @@ APFloat::opStatus
APFloat::handleOverflow(roundingMode rounding_mode)
{
/* Infinity? */
- if(rounding_mode == rmNearestTiesToEven
- || rounding_mode == rmNearestTiesToAway
- || (rounding_mode == rmTowardPositive && !sign)
- || (rounding_mode == rmTowardNegative && sign))
- {
- category = fcInfinity;
- return (opStatus) (opOverflow | opInexact);
- }
+ if (rounding_mode == rmNearestTiesToEven ||
+ rounding_mode == rmNearestTiesToAway ||
+ (rounding_mode == rmTowardPositive && !sign) ||
+ (rounding_mode == rmTowardNegative && sign)) {
+ category = fcInfinity;
+ return (opStatus) (opOverflow | opInexact);
+ }
/* Otherwise we become the largest finite number. */
category = fcNormal;
@@ -1155,11 +1153,11 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
return lost_fraction == lfExactlyHalf || lost_fraction == lfMoreThanHalf;
case rmNearestTiesToEven:
- if(lost_fraction == lfMoreThanHalf)
+ if (lost_fraction == lfMoreThanHalf)
return true;
/* Our zeroes don't have a significand to test. */
- if(lost_fraction == lfExactlyHalf && category != fcZero)
+ if (lost_fraction == lfExactlyHalf && category != fcZero)
return APInt::tcExtractBit(significandParts(), bit);
return false;
@@ -1182,13 +1180,13 @@ APFloat::normalize(roundingMode rounding_mode,
unsigned int omsb; /* One, not zero, based MSB. */
int exponentChange;
- if(category != fcNormal)
+ if (category != fcNormal)
return opOK;
/* Before rounding normalize the exponent of fcNormal numbers. */
omsb = significandMSB() + 1;
- if(omsb) {
+ if (omsb) {
/* OMSB is numbered from 1. We want to place it in the integer
bit numbered PRECISON if possible, with a compensating change in
the exponent. */
@@ -1196,16 +1194,16 @@ APFloat::normalize(roundingMode rounding_mode,
/* If the resulting exponent is too high, overflow according to
the rounding mode. */
- if(exponent + exponentChange > semantics->maxExponent)
+ if (exponent + exponentChange > semantics->maxExponent)
return handleOverflow(rounding_mode);
/* Subnormal numbers have exponent minExponent, and their MSB
is forced based on that. */
- if(exponent + exponentChange < semantics->minExponent)
+ if (exponent + exponentChange < semantics->minExponent)
exponentChange = semantics->minExponent - exponent;
/* Shifting left is easy as we don't lose precision. */
- if(exponentChange < 0) {
+ if (exponentChange < 0) {
assert(lost_fraction == lfExactlyZero);
shiftSignificandLeft(-exponentChange);
@@ -1213,7 +1211,7 @@ APFloat::normalize(roundingMode rounding_mode,
return opOK;
}
- if(exponentChange > 0) {
+ if (exponentChange > 0) {
lostFraction lf;
/* Shift right and capture any new lost fraction. */
@@ -1222,7 +1220,7 @@ APFloat::normalize(roundingMode rounding_mode,
lost_fraction = combineLostFractions(lf, lost_fraction);
/* Keep OMSB up-to-date. */
- if(omsb > (unsigned) exponentChange)
+ if (omsb > (unsigned) exponentChange)
omsb -= exponentChange;
else
omsb = 0;
@@ -1234,28 +1232,28 @@ APFloat::normalize(roundingMode rounding_mode,
/* As specified in IEEE 754, since we do not trap we do not report
underflow for exact results. */
- if(lost_fraction == lfExactlyZero) {
+ if (lost_fraction == lfExactlyZero) {
/* Canonicalize zeroes. */
- if(omsb == 0)
+ if (omsb == 0)
category = fcZero;
return opOK;
}
/* Increment the significand if we're rounding away from zero. */
- if(roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
- if(omsb == 0)
+ if (roundAwayFromZero(rounding_mode, lost_fraction, 0)) {
+ if (omsb == 0)
exponent = semantics->minExponent;
incrementSignificand();
omsb = significandMSB() + 1;
/* Did the significand increment overflow? */
- if(omsb == (unsigned) semantics->precision + 1) {
+ if (omsb == (unsigned) semantics->precision + 1) {
/* Renormalize by incrementing the exponent and shifting our
significand right one. However if we already have the
maximum exponent we overflow to infinity. */
- if(exponent == semantics->maxExponent) {
+ if (exponent == semantics->maxExponent) {
category = fcInfinity;
return (opStatus) (opOverflow | opInexact);
@@ -1269,14 +1267,14 @@ APFloat::normalize(roundingMode rounding_mode,
/* The normal case - we were and are not denormal, and any
significand increment above didn't overflow. */
- if(omsb == semantics->precision)
+ if (omsb == semantics->precision)
return opInexact;
/* We have a non-zero denormal. */
assert(omsb < semantics->precision);
/* Canonicalize zeroes. */
- if(omsb == 0)
+ if (omsb == 0)
category = fcZero;
/* The fcZero case is a denormal that underflowed to zero. */
@@ -1324,7 +1322,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
case convolve(fcInfinity, fcInfinity):
/* Differently signed infinities can only be validly
subtracted. */
- if(((sign ^ rhs.sign)!=0) != subtract) {
+ if (((sign ^ rhs.sign)!=0) != subtract) {
makeNaN();
return opInvalidOp;
}
@@ -1352,7 +1350,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
bits = exponent - rhs.exponent;
/* Subtraction is more subtle than one might naively expect. */
- if(subtract) {
+ if (subtract) {
APFloat temp_rhs(rhs);
bool reverse;
@@ -1381,16 +1379,16 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
/* Invert the lost fraction - it was on the RHS and
subtracted. */
- if(lost_fraction == lfLessThanHalf)
+ if (lost_fraction == lfLessThanHalf)
lost_fraction = lfMoreThanHalf;
- else if(lost_fraction == lfMoreThanHalf)
+ else if (lost_fraction == lfMoreThanHalf)
lost_fraction = lfLessThanHalf;
/* The code above is intended to ensure that no borrow is
necessary. */
assert(!carry);
} else {
- if(bits > 0) {
+ if (bits > 0) {
APFloat temp_rhs(rhs);
lost_fraction = temp_rhs.shiftSignificandRight(bits);
@@ -1561,7 +1559,7 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
fs = addOrSubtractSpecials(rhs, subtract);
/* This return code means it was not a simple case. */
- if(fs == opDivByZero) {
+ if (fs == opDivByZero) {
lostFraction lost_fraction;
lost_fraction = addOrSubtractSignificand(rhs, subtract);
@@ -1574,8 +1572,8 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */
- if(category == fcZero) {
- if(rhs.category != fcZero || (sign == rhs.sign) == subtract)
+ if (category == fcZero) {
+ if (rhs.category != fcZero || (sign == rhs.sign) == subtract)
sign = (rounding_mode == rmTowardNegative);
}
@@ -1606,10 +1604,10 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
sign ^= rhs.sign;
fs = multiplySpecials(rhs);
- if(category == fcNormal) {
+ if (category == fcNormal) {
lostFraction lost_fraction = multiplySignificand(rhs, 0);
fs = normalize(rounding_mode, lost_fraction);
- if(lost_fraction != lfExactlyZero)
+ if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs | opInexact);
}
@@ -1626,10 +1624,10 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
sign ^= rhs.sign;
fs = divideSpecials(rhs);
- if(category == fcNormal) {
+ if (category == fcNormal) {
lostFraction lost_fraction = divideSignificand(rhs);
fs = normalize(rounding_mode, lost_fraction);
- if(lost_fraction != lfExactlyZero)
+ if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs | opInexact);
}
@@ -1673,7 +1671,7 @@ APFloat::remainder(const APFloat &rhs)
return fs;
}
-/* Normalized llvm frem (C fmod).
+/* Normalized llvm frem (C fmod).
This is not currently correct in all cases. */
APFloat::opStatus
APFloat::mod(const APFloat &rhs, roundingMode rounding_mode)
@@ -1730,20 +1728,20 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
/* If and only if all arguments are normal do we need to do an
extended-precision calculation. */
- if(category == fcNormal
- && multiplicand.category == fcNormal
- && addend.category == fcNormal) {
+ if (category == fcNormal &&
+ multiplicand.category == fcNormal &&
+ addend.category == fcNormal) {
lostFraction lost_fraction;
lost_fraction = multiplySignificand(multiplicand, &addend);
fs = normalize(rounding_mode, lost_fraction);
- if(lost_fraction != lfExactlyZero)
+ if (lost_fraction != lfExactlyZero)
fs = (opStatus) (fs | opInexact);
/* If two numbers add (exactly) to zero, IEEE 754 decrees it is a
positive zero unless rounding to minus infinity, except that
adding two like-signed zeroes gives that zero. */
- if(category == fcZero && sign != addend.sign)
+ if (category == fcZero && sign != addend.sign)
sign = (rounding_mode == rmTowardNegative);
} else {
fs = multiplySpecials(multiplicand);
@@ -1755,7 +1753,7 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
If we need to do the addition we can do so with normal
precision. */
- if(fs == opOK)
+ if (fs == opOK)
fs = addOrSubtract(addend, rounding_mode, false);
}
@@ -1787,7 +1785,7 @@ APFloat::compare(const APFloat &rhs) const
case convolve(fcInfinity, fcNormal):
case convolve(fcInfinity, fcZero):
case convolve(fcNormal, fcZero):
- if(sign)
+ if (sign)
return cmpLessThan;
else
return cmpGreaterThan;
@@ -1795,15 +1793,15 @@ APFloat::compare(const APFloat &rhs) const
case convolve(fcNormal, fcInfinity):
case convolve(fcZero, fcInfinity):
case convolve(fcZero, fcNormal):
- if(rhs.sign)
+ if (rhs.sign)
return cmpGreaterThan;
else
return cmpLessThan;
case convolve(fcInfinity, fcInfinity):
- if(sign == rhs.sign)
+ if (sign == rhs.sign)
return cmpEqual;
- else if(sign)
+ else if (sign)
return cmpLessThan;
else
return cmpGreaterThan;
@@ -1816,8 +1814,8 @@ APFloat::compare(const APFloat &rhs) const
}
/* Two normal numbers. Do they have the same sign? */
- if(sign != rhs.sign) {
- if(sign)
+ if (sign != rhs.sign) {
+ if (sign)
result = cmpLessThan;
else
result = cmpGreaterThan;
@@ -1825,10 +1823,10 @@ APFloat::compare(const APFloat &rhs) const
/* Compare absolute values; invert result if negative. */
result = compareAbsoluteValue(rhs);
- if(sign) {
- if(result == cmpLessThan)
+ if (sign) {
+ if (result == cmpLessThan)
result = cmpGreaterThan;
- else if(result == cmpGreaterThan)
+ else if (result == cmpGreaterThan)
result = cmpLessThan;
}
}
@@ -1886,7 +1884,7 @@ APFloat::convert(const fltSemantics &toSemantics,
}
}
- if(category == fcNormal) {
+ if (category == fcNormal) {
/* Re-interpret our bit-pattern. */
exponent += toSemantics.precision - semantics->precision;
semantics = &toSemantics;
@@ -1911,7 +1909,7 @@ APFloat::convert(const fltSemantics &toSemantics,
// x87 long double).
if (APInt::tcLSB(significandParts(), newPartCount) < ushift)
*losesInfo = true;
- if (oldSemantics == &APFloat::x87DoubleExtended &&
+ if (oldSemantics == &APFloat::x87DoubleExtended &&
(!(*significandParts() & 0x8000000000000000ULL) ||
!(*significandParts() & 0x4000000000000000ULL)))
*losesInfo = true;
@@ -1956,12 +1954,12 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
*isExact = false;
/* Handle the three special cases first. */
- if(category == fcInfinity || category == fcNaN)
+ if (category == fcInfinity || category == fcNaN)
return opInvalidOp;
dstPartsCount = partCountForBits(width);
- if(category == fcZero) {
+ if (category == fcZero) {
APInt::tcSet(parts, 0, dstPartsCount);
// Negative zero can't be represented as an int.
*isExact = !sign;
@@ -2004,8 +2002,8 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
if (truncatedBits) {
lost_fraction = lostFractionThroughTruncation(src, partCount(),
truncatedBits);
- if (lost_fraction != lfExactlyZero
- && roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
+ if (lost_fraction != lfExactlyZero &&
+ roundAwayFromZero(rounding_mode, lost_fraction, truncatedBits)) {
if (APInt::tcIncrement(parts, dstPartsCount))
return opInvalidOp; /* Overflow. */
}
@@ -2062,7 +2060,7 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width,
{
opStatus fs;
- fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
+ fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
isExact);
if (fs == opInvalidOp) {
@@ -2149,8 +2147,8 @@ APFloat::convertFromSignExtendedInteger(const integerPart *src,
opStatus status;
assertArithmeticOK(*semantics);
- if (isSigned
- && APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
+ if (isSigned &&
+ APInt::tcExtractBit(src, srcCount * integerPartWidth - 1)) {
integerPart *copy;
/* If we're signed and negative negate a copy. */
@@ -2178,7 +2176,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
APInt api = APInt(width, partCount, parts);
sign = false;
- if(isSigned && APInt::tcExtractBit(parts, width - 1)) {
+ if (isSigned && APInt::tcExtractBit(parts, width - 1)) {
sign = true;
api = -api;
}
@@ -2209,10 +2207,10 @@ APFloat::convertFromHexadecimalString(const StringRef &s,
StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot);
firstSignificantDigit = p;
- for(; p != end;) {
+ for (; p != end;) {
integerPart hex_value;
- if(*p == '.') {
+ if (*p == '.') {
assert(dot == end && "String contains multiple dots");
dot = p++;
if (p == end) {
@@ -2221,7 +2219,7 @@ APFloat::convertFromHexadecimalString(const StringRef &s,
}
hex_value = hexDigitValue(*p);
- if(hex_value == -1U) {
+ if (hex_value == -1U) {
break;
}
@@ -2231,13 +2229,13 @@ APFloat::convertFromHexadecimalString(const StringRef &s,
break;
} else {
/* Store the number whilst 4-bit nibbles remain. */
- if(bitPos) {
+ if (bitPos) {
bitPos -= 4;
hex_value <<= bitPos % integerPartWidth;
significand[bitPos / integerPartWidth] |= hex_value;
} else {
lost_fraction = trailingHexadecimalFraction(p, end, hex_value);
- while(p != end && hexDigitValue(*p) != -1U)
+ while (p != end && hexDigitValue(*p) != -1U)
p++;
break;
}
@@ -2251,7 +2249,7 @@ APFloat::convertFromHexadecimalString(const StringRef &s,
assert((dot == end || p - begin != 1) && "Significand has no digits");
/* Ignore the exponent if we are zero. */
- if(p != firstSignificantDigit) {
+ if (p != firstSignificantDigit) {
int expAdjustment;
/* Implicit hexadecimal point? */
@@ -2261,7 +2259,7 @@ APFloat::convertFromHexadecimalString(const StringRef &s,
/* Calculate the exponent adjustment implicit in the number of
significant digits. */
expAdjustment = static_cast<int>(dot - firstSignificantDigit);
- if(expAdjustment < 0)
+ if (expAdjustment < 0)
expAdjustment++;
expAdjustment = expAdjustment * 4 - 1;
@@ -2287,8 +2285,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
integerPart pow5Parts[maxPowerOfFiveParts];
bool isNearest;
- isNearest = (rounding_mode == rmNearestTiesToEven
- || rounding_mode == rmNearestTiesToAway);
+ isNearest = (rounding_mode == rmNearestTiesToEven ||
+ rounding_mode == rmNearestTiesToAway);
parts = partCountForBits(semantics->precision + 11);
@@ -2482,13 +2480,13 @@ APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
StringRef::iterator p = str.begin();
size_t slen = str.size();
sign = *p == '-' ? 1 : 0;
- if(*p == '-' || *p == '+') {
+ if (*p == '-' || *p == '+') {
p++;
slen--;
assert(slen && "String has no digits");
}
- if(slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
+ if (slen >= 2 && p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) {
assert(slen - 2 && "Invalid string");
return convertFromHexadecimalString(StringRef(p + 2, slen - 2),
rounding_mode);
@@ -3013,7 +3011,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
// exponent2 and significand2 are required to be 0; we don't check
category = fcInfinity;
} else if (myexponent==0x7ff && mysignificand!=0) {
- // exponent meaningless. So is the whole second word, but keep it
+ // exponent meaningless. So is the whole second word, but keep it
// for determinism.
category = fcNaN;
exponent2 = myexponent2;
@@ -3031,7 +3029,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
exponent = -1022;
else
significandParts()[0] |= 0x10000000000000LL; // integer bit
- if (myexponent2==0)
+ if (myexponent2==0)
exponent2 = -1022;
else
significandParts()[1] |= 0x10000000000000LL; // integer bit
@@ -3217,8 +3215,8 @@ APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
significand[i] = ~((integerPart) 0);
// ...and then clear the top bits for internal consistency.
- significand[N-1]
- &= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
+ significand[N-1] &=
+ (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1)) - 1;
return Val;
}
@@ -3247,8 +3245,8 @@ APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
Val.exponent = Sem.minExponent;
Val.zeroSignificand();
- Val.significandParts()[partCountForBits(Sem.precision)-1]
- |= (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
+ Val.significandParts()[partCountForBits(Sem.precision)-1] |=
+ (((integerPart) 1) << ((Sem.precision % integerPartWidth) - 1));
return Val;
}
@@ -3433,7 +3431,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
// log2(N * 5^e) == log2(N) + e * log2(5)
// <= semantics->precision + e * 137 / 59
// (log_2(5) ~ 2.321928 < 2.322034 ~ 137/59)
-
+
unsigned precision = semantics->precision + 137 * texp / 59;
// Multiply significand by 5^e.
@@ -3442,7 +3440,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
APInt five_to_the_i(precision, 5);
while (true) {
if (texp & 1) significand *= five_to_the_i;
-
+
texp >>= 1;
if (!texp) break;
five_to_the_i *= five_to_the_i;
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 6a6384a..50025d2 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -702,15 +702,14 @@ static inline uint32_t hashword(const uint64_t *k64, size_t length)
a = b = c = 0xdeadbeef + (((uint32_t)length)<<2);
/*------------------------------------------------- handle most of the key */
- while (length > 3)
- {
- a += k[0];
- b += k[1];
- c += k[2];
- mix(a,b,c);
- length -= 3;
- k += 3;
- }
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ mix(a,b,c);
+ length -= 3;
+ k += 3;
+ }
/*------------------------------------------- handle the last 3 uint32_t's */
switch (length) { /* all the case statements fall through */
@@ -1383,8 +1382,8 @@ APInt APInt::sqrt() const {
// libc sqrt function which will probably use a hardware sqrt computation.
// This should be faster than the algorithm below.
if (magnitude < 52) {
-#ifdef _MSC_VER
- // Amazingly, VC++ doesn't have round().
+#if defined( _MSC_VER ) || defined(_MINIX)
+ // Amazingly, VC++ and Minix don't have round().
return APInt(BitWidth,
uint64_t(::sqrt(double(isSingleWord()?VAL:pVal[0]))) + 0.5);
#else
@@ -2065,8 +2064,8 @@ void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
assert((slen <= numbits || radix != 2) && "Insufficient bit width");
assert(((slen-1)*3 <= numbits || radix != 8) && "Insufficient bit width");
assert(((slen-1)*4 <= numbits || radix != 16) && "Insufficient bit width");
- assert((((slen-1)*64)/22 <= numbits || radix != 10)
- && "Insufficient bit width");
+ assert((((slen-1)*64)/22 <= numbits || radix != 10) &&
+ "Insufficient bit width");
// Allocate memory
if (!isSingleWord())
@@ -2229,7 +2228,7 @@ namespace {
static inline integerPart
lowBitMask(unsigned int bits)
{
- assert (bits != 0 && bits <= integerPartWidth);
+ assert(bits != 0 && bits <= integerPartWidth);
return ~(integerPart) 0 >> (integerPartWidth - bits);
}
@@ -2306,10 +2305,10 @@ APInt::tcSet(integerPart *dst, integerPart part, unsigned int parts)
{
unsigned int i;
- assert (parts > 0);
+ assert(parts > 0);
dst[0] = part;
- for(i = 1; i < parts; i++)
+ for (i = 1; i < parts; i++)
dst[i] = 0;
}
@@ -2319,7 +2318,7 @@ APInt::tcAssign(integerPart *dst, const integerPart *src, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
dst[i] = src[i];
}
@@ -2329,7 +2328,7 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
if (src[i])
return false;
@@ -2340,8 +2339,8 @@ APInt::tcIsZero(const integerPart *src, unsigned int parts)
int
APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
{
- return(parts[bit / integerPartWidth]
- & ((integerPart) 1 << bit % integerPartWidth)) != 0;
+ return (parts[bit / integerPartWidth] &
+ ((integerPart) 1 << bit % integerPartWidth)) != 0;
}
/* Set the given bit of a bignum. */
@@ -2366,7 +2365,7 @@ APInt::tcLSB(const integerPart *parts, unsigned int n)
{
unsigned int i, lsb;
- for(i = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
if (parts[i] != 0) {
lsb = partLSB(parts[i]);
@@ -2385,13 +2384,13 @@ APInt::tcMSB(const integerPart *parts, unsigned int n)
unsigned int msb;
do {
- --n;
+ --n;
- if (parts[n] != 0) {
- msb = partMSB(parts[n]);
+ if (parts[n] != 0) {
+ msb = partMSB(parts[n]);
- return msb + n * integerPartWidth;
- }
+ return msb + n * integerPartWidth;
+ }
} while (n);
return -1U;
@@ -2408,7 +2407,7 @@ APInt::tcExtract(integerPart *dst, unsigned int dstCount,const integerPart *src,
unsigned int firstSrcPart, dstParts, shift, n;
dstParts = (srcBits + integerPartWidth - 1) / integerPartWidth;
- assert (dstParts <= dstCount);
+ assert(dstParts <= dstCount);
firstSrcPart = srcLSB / integerPartWidth;
tcAssign (dst, src + firstSrcPart, dstParts);
@@ -2443,7 +2442,7 @@ APInt::tcAdd(integerPart *dst, const integerPart *rhs,
assert(c <= 1);
- for(i = 0; i < parts; i++) {
+ for (i = 0; i < parts; i++) {
integerPart l;
l = dst[i];
@@ -2468,7 +2467,7 @@ APInt::tcSubtract(integerPart *dst, const integerPart *rhs,
assert(c <= 1);
- for(i = 0; i < parts; i++) {
+ for (i = 0; i < parts; i++) {
integerPart l;
l = dst[i];
@@ -2518,7 +2517,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
/* N loops; minimum of dstParts and srcParts. */
n = dstParts < srcParts ? dstParts: srcParts;
- for(i = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
integerPart low, mid, high, srcPart;
/* [ LOW, HIGH ] = MULTIPLIER * SRC[i] + DST[i] + CARRY.
@@ -2583,7 +2582,7 @@ APInt::tcMultiplyPart(integerPart *dst, const integerPart *src,
non-zero. This is true if any remaining src parts are non-zero
and the multiplier is non-zero. */
if (multiplier)
- for(; i < srcParts; i++)
+ for (; i < srcParts; i++)
if (src[i])
return 1;
@@ -2608,7 +2607,7 @@ APInt::tcMultiply(integerPart *dst, const integerPart *lhs,
overflow = 0;
tcSet(dst, 0, parts);
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
overflow |= tcMultiplyPart(&dst[i], lhs, rhs[i], 0, parts,
parts - i, true);
@@ -2634,7 +2633,7 @@ APInt::tcFullMultiply(integerPart *dst, const integerPart *lhs,
tcSet(dst, 0, rhsParts);
- for(n = 0; n < lhsParts; n++)
+ for (n = 0; n < lhsParts; n++)
tcMultiplyPart(&dst[n], rhs, lhs[n], 0, rhsParts, rhsParts + 1, true);
n = lhsParts + rhsParts;
@@ -2678,7 +2677,7 @@ APInt::tcDivide(integerPart *lhs, const integerPart *rhs,
/* Loop, subtracting SRHS if REMAINDER is greater and adding that to
the total. */
- for(;;) {
+ for (;;) {
int compare;
compare = tcCompare(remainder, srhs, parts);
@@ -2746,7 +2745,7 @@ APInt::tcShiftRight(integerPart *dst, unsigned int parts, unsigned int count)
/* Perform the shift. This leaves the most significant COUNT bits
of the result at zero. */
- for(i = 0; i < parts; i++) {
+ for (i = 0; i < parts; i++) {
integerPart part;
if (i + jump >= parts) {
@@ -2771,7 +2770,7 @@ APInt::tcAnd(integerPart *dst, const integerPart *rhs, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
dst[i] &= rhs[i];
}
@@ -2781,7 +2780,7 @@ APInt::tcOr(integerPart *dst, const integerPart *rhs, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
dst[i] |= rhs[i];
}
@@ -2791,7 +2790,7 @@ APInt::tcXor(integerPart *dst, const integerPart *rhs, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
dst[i] ^= rhs[i];
}
@@ -2801,7 +2800,7 @@ APInt::tcComplement(integerPart *dst, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
dst[i] = ~dst[i];
}
@@ -2830,7 +2829,7 @@ APInt::tcIncrement(integerPart *dst, unsigned int parts)
{
unsigned int i;
- for(i = 0; i < parts; i++)
+ for (i = 0; i < parts; i++)
if (++dst[i] != 0)
break;
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 2ab4103..d31f34e 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -676,8 +676,8 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
<< " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
- } else if (!HasUnlimitedPositionals
- && PositionalVals.size() > PositionalOpts.size()) {
+ } else if (!HasUnlimitedPositionals &&
+ PositionalVals.size() > PositionalOpts.size()) {
errs() << ProgramName
<< ": Too many positional arguments specified!\n"
<< "Can specify at most " << PositionalOpts.size()
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 82b4b8c..eccfa0b 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -64,8 +64,7 @@ DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
cl::location(DebugOnlyOptLoc), cl::ValueRequired);
// Signal handlers - dump debug output on termination.
-static void debug_user_sig_handler(void *Cookie)
-{
+static void debug_user_sig_handler(void *Cookie) {
// This is a bit sneaky. Since this is under #ifndef NDEBUG, we
// know that debug mode is enabled and dbgs() really is a
// circular_raw_ostream. If NDEBUG is defined, then dbgs() ==
diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp
index 8bb1566..4412cb2 100644
--- a/lib/Support/ErrorHandling.cpp
+++ b/lib/Support/ErrorHandling.cpp
@@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
-// This file defines an API for error handling, it supersedes cerr+abort(), and
+// This file defines an API for error handling, it supersedes cerr+abort(), and
// cerr+exit() style error handling.
// Callbacks can be registered for these errors through this API.
//===----------------------------------------------------------------------===//
@@ -57,7 +57,7 @@ void llvm_report_error(const Twine &reason) {
exit(1);
}
-void llvm_unreachable_internal(const char *msg, const char *file,
+void llvm_unreachable_internal(const char *msg, const char *file,
unsigned line) {
// This code intentionally doesn't call the ErrorHandler callback, because
// llvm_unreachable is intended to be used to indicate "impossible"
@@ -71,4 +71,3 @@ void llvm_unreachable_internal(const char *msg, const char *file,
abort();
}
}
-
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 345a78c..4f135ea 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -14,6 +14,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/System/Errno.h"
#include "llvm/System/Path.h"
#include "llvm/System/Process.h"
#include "llvm/System/Program.h"
@@ -167,6 +168,14 @@ public:
sys::Path::UnMapFilePages(getBufferStart(), getBufferSize());
}
};
+
+/// FileCloser - RAII object to make sure an FD gets closed properly.
+class FileCloser {
+ int FD;
+public:
+ FileCloser(int FD) : FD(FD) {}
+ ~FileCloser() { ::close(FD); }
+};
}
MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
@@ -178,9 +187,10 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
SmallString<256> PathBuf(Filename.begin(), Filename.end());
int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags);
if (FD == -1) {
- if (ErrStr) *ErrStr = strerror(errno);
+ if (ErrStr) *ErrStr = sys::StrError();
return 0;
}
+ FileCloser FC(FD); // Close FD on return.
// If we don't know the file size, use fstat to find out. fstat on an open
// file descriptor is cheaper than stat on a random path.
@@ -190,8 +200,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
// TODO: This should use fstat64 when available.
if (fstat(FD, FileInfoPtr) == -1) {
- if (ErrStr) *ErrStr = strerror(errno);
- ::close(FD);
+ if (ErrStr) *ErrStr = sys::StrError();
return 0;
}
FileSize = FileInfoPtr->st_size;
@@ -208,7 +217,6 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
(FileSize & (sys::Process::GetPageSize()-1)) != 0) {
if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
// Close the file descriptor, now that the whole file is in memory.
- ::close(FD);
return new MemoryBufferMMapFile(Filename, Pages, FileSize);
}
}
@@ -217,30 +225,31 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
if (!Buf) {
// Failed to create a buffer.
if (ErrStr) *ErrStr = "could not allocate buffer";
- ::close(FD);
return 0;
}
OwningPtr<MemoryBuffer> SB(Buf);
char *BufPtr = const_cast<char*>(SB->getBufferStart());
-
+
size_t BytesLeft = FileSize;
while (BytesLeft) {
ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
- if (NumRead > 0) {
- BytesLeft -= NumRead;
- BufPtr += NumRead;
- } else if (NumRead == -1 && errno == EINTR) {
- // try again
- } else {
- // error reading.
- if (ErrStr) *ErrStr = strerror(errno);
- close(FD);
+ if (NumRead == -1) {
+ if (errno == EINTR)
+ continue;
+ // Error while reading.
+ if (ErrStr) *ErrStr = sys::StrError();
return 0;
+ } else if (NumRead == 0) {
+ // We hit EOF early, truncate and terminate buffer.
+ Buf->BufferEnd = BufPtr;
+ *BufPtr = 0;
+ return SB.take();
}
+ BytesLeft -= NumRead;
+ BufPtr += NumRead;
}
- close(FD);
-
+
return SB.take();
}
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index e787670..7d5f65a 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -32,8 +32,8 @@
#include <cstring>
using namespace llvm;
-// GetLibSupportInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); }
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
/// -stats - Command line option to cause transformations to emit stats about
/// what they did.
@@ -48,6 +48,8 @@ namespace {
/// llvm_shutdown is called. We print statistics from the destructor.
class StatisticInfo {
std::vector<const Statistic*> Stats;
+ friend void llvm::PrintStatistics();
+ friend void llvm::PrintStatistics(raw_ostream &OS);
public:
~StatisticInfo();
@@ -92,42 +94,55 @@ struct NameCompare {
// Print information when destroyed, iff command line option is specified.
StatisticInfo::~StatisticInfo() {
- // Statistics not enabled?
- if (Stats.empty()) return;
+ llvm::PrintStatistics();
+}
- // Get the stream to write to.
- raw_ostream &OutStream = *GetLibSupportInfoOutputFile();
+void llvm::EnableStatistics() {
+ Enabled.setValue(true);
+}
+
+void llvm::PrintStatistics(raw_ostream &OS) {
+ StatisticInfo &Stats = *StatInfo;
// Figure out how long the biggest Value and Name fields are.
unsigned MaxNameLen = 0, MaxValLen = 0;
- for (size_t i = 0, e = Stats.size(); i != e; ++i) {
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
MaxValLen = std::max(MaxValLen,
- (unsigned)utostr(Stats[i]->getValue()).size());
+ (unsigned)utostr(Stats.Stats[i]->getValue()).size());
MaxNameLen = std::max(MaxNameLen,
- (unsigned)std::strlen(Stats[i]->getName()));
+ (unsigned)std::strlen(Stats.Stats[i]->getName()));
}
// Sort the fields by name.
- std::stable_sort(Stats.begin(), Stats.end(), NameCompare());
+ std::stable_sort(Stats.Stats.begin(), Stats.Stats.end(), NameCompare());
// Print out the statistics header...
- OutStream << "===" << std::string(73, '-') << "===\n"
- << " ... Statistics Collected ...\n"
- << "===" << std::string(73, '-') << "===\n\n";
+ OS << "===" << std::string(73, '-') << "===\n"
+ << " ... Statistics Collected ...\n"
+ << "===" << std::string(73, '-') << "===\n\n";
// Print all of the statistics.
- for (size_t i = 0, e = Stats.size(); i != e; ++i) {
- std::string CountStr = utostr(Stats[i]->getValue());
- OutStream << std::string(MaxValLen-CountStr.size(), ' ')
- << CountStr << " " << Stats[i]->getName()
- << std::string(MaxNameLen-std::strlen(Stats[i]->getName()), ' ')
- << " - " << Stats[i]->getDesc() << "\n";
-
+ for (size_t i = 0, e = Stats.Stats.size(); i != e; ++i) {
+ std::string CountStr = utostr(Stats.Stats[i]->getValue());
+ OS << std::string(MaxValLen-CountStr.size(), ' ')
+ << CountStr << " " << Stats.Stats[i]->getName()
+ << std::string(MaxNameLen-std::strlen(Stats.Stats[i]->getName()), ' ')
+ << " - " << Stats.Stats[i]->getDesc() << "\n";
}
- OutStream << '\n'; // Flush the output stream...
- OutStream.flush();
-
- if (&OutStream != &outs() && &OutStream != &errs() && &OutStream != &dbgs())
- delete &OutStream; // Close the file.
+ OS << '\n'; // Flush the output stream.
+ OS.flush();
+
+}
+
+void llvm::PrintStatistics() {
+ StatisticInfo &Stats = *StatInfo;
+
+ // Statistics not enabled?
+ if (Stats.Stats.empty()) return;
+
+ // Get the stream to write to.
+ raw_ostream &OutStream = *CreateInfoOutputFile();
+ PrintStatistics(OutStream);
+ delete &OutStream; // Close the file.
}
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 4bdfac2..4fac073 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -11,20 +11,20 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Support/Format.h"
+#include "llvm/System/Mutex.h"
#include "llvm/System/Process.h"
-#include <algorithm>
-#include <functional>
-#include <map>
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringMap.h"
using namespace llvm;
-// GetLibSupportInfoOutputFile - Return a file stream to print our output on.
-namespace llvm { extern raw_ostream *GetLibSupportInfoOutputFile(); }
+// CreateInfoOutputFile - Return a file stream to print our output on.
+namespace llvm { extern raw_ostream *CreateInfoOutputFile(); }
// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
// of constructor/destructor ordering being unspecified by C++. Basically the
@@ -53,117 +53,103 @@ namespace {
cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
}
+// CreateInfoOutputFile - Return a file stream to print our output on.
+raw_ostream *llvm::CreateInfoOutputFile() {
+ const std::string &OutputFilename = getLibSupportInfoOutputFilename();
+ if (OutputFilename.empty())
+ return new raw_fd_ostream(2, false); // stderr.
+ if (OutputFilename == "-")
+ return new raw_fd_ostream(1, false); // stdout.
+
+ std::string Error;
+ raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(),
+ Error, raw_fd_ostream::F_Append);
+ if (Error.empty())
+ return Result;
+
+ errs() << "Error opening info-output-file '"
+ << OutputFilename << " for appending!\n";
+ delete Result;
+ return new raw_fd_ostream(2, false); // stderr.
+}
+
+
static TimerGroup *DefaultTimerGroup = 0;
static TimerGroup *getDefaultTimerGroup() {
- TimerGroup* tmp = DefaultTimerGroup;
+ TimerGroup *tmp = DefaultTimerGroup;
sys::MemoryFence();
+ if (tmp) return tmp;
+
+ llvm_acquire_global_lock();
+ tmp = DefaultTimerGroup;
if (!tmp) {
- llvm_acquire_global_lock();
- tmp = DefaultTimerGroup;
- if (!tmp) {
- tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
- sys::MemoryFence();
- DefaultTimerGroup = tmp;
- }
- llvm_release_global_lock();
+ tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
+ sys::MemoryFence();
+ DefaultTimerGroup = tmp;
}
+ llvm_release_global_lock();
return tmp;
}
-Timer::Timer(const std::string &N)
- : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N),
- Started(false), TG(getDefaultTimerGroup()) {
- TG->addTimer();
-}
-
-Timer::Timer(const std::string &N, TimerGroup &tg)
- : Elapsed(0), UserTime(0), SystemTime(0), MemUsed(0), PeakMem(0), Name(N),
- Started(false), TG(&tg) {
- TG->addTimer();
-}
+//===----------------------------------------------------------------------===//
+// Timer Implementation
+//===----------------------------------------------------------------------===//
-Timer::Timer(const Timer &T) {
- TG = T.TG;
- if (TG) TG->addTimer();
- operator=(T);
+void Timer::init(StringRef N) {
+ assert(TG == 0 && "Timer already initialized");
+ Name.assign(N.begin(), N.end());
+ Started = false;
+ TG = getDefaultTimerGroup();
+ TG->addTimer(*this);
}
-
-// Copy ctor, initialize with no TG member.
-Timer::Timer(bool, const Timer &T) {
- TG = T.TG; // Avoid assertion in operator=
- operator=(T); // Copy contents
- TG = 0;
+void Timer::init(StringRef N, TimerGroup &tg) {
+ assert(TG == 0 && "Timer already initialized");
+ Name.assign(N.begin(), N.end());
+ Started = false;
+ TG = &tg;
+ TG->addTimer(*this);
}
-
Timer::~Timer() {
- if (TG) {
- if (Started) {
- Started = false;
- TG->addTimerToPrint(*this);
- }
- TG->removeTimer();
- }
+ if (!TG) return; // Never initialized, or already cleared.
+ TG->removeTimer(*this);
}
static inline size_t getMemUsage() {
- if (TrackSpace)
- return sys::Process::GetMallocUsage();
- return 0;
+ if (!TrackSpace) return 0;
+ return sys::Process::GetMallocUsage();
}
-struct TimeRecord {
- double Elapsed, UserTime, SystemTime;
- ssize_t MemUsed;
-};
-
-static TimeRecord getTimeRecord(bool Start) {
+TimeRecord TimeRecord::getCurrentTime(bool Start) {
TimeRecord Result;
-
- sys::TimeValue now(0,0);
- sys::TimeValue user(0,0);
- sys::TimeValue sys(0,0);
-
- ssize_t MemUsed = 0;
+ sys::TimeValue now(0,0), user(0,0), sys(0,0);
+
if (Start) {
- MemUsed = getMemUsage();
- sys::Process::GetTimeUsage(now,user,sys);
+ Result.MemUsed = getMemUsage();
+ sys::Process::GetTimeUsage(now, user, sys);
} else {
- sys::Process::GetTimeUsage(now,user,sys);
- MemUsed = getMemUsage();
+ sys::Process::GetTimeUsage(now, user, sys);
+ Result.MemUsed = getMemUsage();
}
- Result.Elapsed = now.seconds() + now.microseconds() / 1000000.0;
- Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
- Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0;
- Result.MemUsed = MemUsed;
-
+ Result.WallTime = now.seconds() + now.microseconds() / 1000000.0;
+ Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
+ Result.SystemTime = sys.seconds() + sys.microseconds() / 1000000.0;
return Result;
}
static ManagedStatic<std::vector<Timer*> > ActiveTimers;
void Timer::startTimer() {
- sys::SmartScopedLock<true> L(*TimerLock);
Started = true;
ActiveTimers->push_back(this);
- TimeRecord TR = getTimeRecord(true);
- Elapsed -= TR.Elapsed;
- UserTime -= TR.UserTime;
- SystemTime -= TR.SystemTime;
- MemUsed -= TR.MemUsed;
- PeakMemBase = TR.MemUsed;
+ Time -= TimeRecord::getCurrentTime(true);
}
void Timer::stopTimer() {
- sys::SmartScopedLock<true> L(*TimerLock);
- TimeRecord TR = getTimeRecord(false);
- Elapsed += TR.Elapsed;
- UserTime += TR.UserTime;
- SystemTime += TR.SystemTime;
- MemUsed += TR.MemUsed;
+ Time += TimeRecord::getCurrentTime(false);
if (ActiveTimers->back() == this) {
ActiveTimers->pop_back();
@@ -175,217 +161,223 @@ void Timer::stopTimer() {
}
}
-void Timer::sum(const Timer &T) {
- Elapsed += T.Elapsed;
- UserTime += T.UserTime;
- SystemTime += T.SystemTime;
- MemUsed += T.MemUsed;
- PeakMem += T.PeakMem;
+static void printVal(double Val, double Total, raw_ostream &OS) {
+ if (Total < 1e-7) // Avoid dividing by zero.
+ OS << " ----- ";
+ else {
+ OS << " " << format("%7.4f", Val) << " (";
+ OS << format("%5.1f", Val*100/Total) << "%)";
+ }
}
-/// addPeakMemoryMeasurement - This method should be called whenever memory
-/// usage needs to be checked. It adds a peak memory measurement to the
-/// currently active timers, which will be printed when the timer group prints
-///
-void Timer::addPeakMemoryMeasurement() {
- sys::SmartScopedLock<true> L(*TimerLock);
- size_t MemUsed = getMemUsage();
-
- for (std::vector<Timer*>::iterator I = ActiveTimers->begin(),
- E = ActiveTimers->end(); I != E; ++I)
- (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase);
+void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const {
+ if (Total.getUserTime())
+ printVal(getUserTime(), Total.getUserTime(), OS);
+ if (Total.getSystemTime())
+ printVal(getSystemTime(), Total.getSystemTime(), OS);
+ if (Total.getProcessTime())
+ printVal(getProcessTime(), Total.getProcessTime(), OS);
+ printVal(getWallTime(), Total.getWallTime(), OS);
+
+ OS << " ";
+
+ if (Total.getMemUsed())
+ OS << format("%9lld", (long long)getMemUsed()) << " ";
}
+
//===----------------------------------------------------------------------===//
// NamedRegionTimer Implementation
//===----------------------------------------------------------------------===//
-namespace {
-
-typedef std::map<std::string, Timer> Name2Timer;
-typedef std::map<std::string, std::pair<TimerGroup, Name2Timer> > Name2Pair;
-
-}
-
-static ManagedStatic<Name2Timer> NamedTimers;
-
-static ManagedStatic<Name2Pair> NamedGroupedTimers;
+typedef StringMap<Timer> Name2TimerMap;
-static Timer &getNamedRegionTimer(const std::string &Name) {
- sys::SmartScopedLock<true> L(*TimerLock);
- Name2Timer::iterator I = NamedTimers->find(Name);
- if (I != NamedTimers->end())
- return I->second;
-
- return NamedTimers->insert(I, std::make_pair(Name, Timer(Name)))->second;
-}
-
-static Timer &getNamedRegionTimer(const std::string &Name,
- const std::string &GroupName) {
- sys::SmartScopedLock<true> L(*TimerLock);
-
- Name2Pair::iterator I = NamedGroupedTimers->find(GroupName);
- if (I == NamedGroupedTimers->end()) {
- TimerGroup TG(GroupName);
- std::pair<TimerGroup, Name2Timer> Pair(TG, Name2Timer());
- I = NamedGroupedTimers->insert(I, std::make_pair(GroupName, Pair));
+class Name2PairMap {
+ StringMap<std::pair<TimerGroup*, Name2TimerMap> > Map;
+public:
+ ~Name2PairMap() {
+ for (StringMap<std::pair<TimerGroup*, Name2TimerMap> >::iterator
+ I = Map.begin(), E = Map.end(); I != E; ++I)
+ delete I->second.first;
}
+
+ Timer &get(StringRef Name, StringRef GroupName) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName];
+
+ if (!GroupEntry.first)
+ GroupEntry.first = new TimerGroup(GroupName);
+
+ Timer &T = GroupEntry.second[Name];
+ if (!T.isInitialized())
+ T.init(Name, *GroupEntry.first);
+ return T;
+ }
+};
- Name2Timer::iterator J = I->second.second.find(Name);
- if (J == I->second.second.end())
- J = I->second.second.insert(J,
- std::make_pair(Name,
- Timer(Name,
- I->second.first)));
+static ManagedStatic<Name2TimerMap> NamedTimers;
+static ManagedStatic<Name2PairMap> NamedGroupedTimers;
- return J->second;
+static Timer &getNamedRegionTimer(StringRef Name) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ Timer &T = (*NamedTimers)[Name];
+ if (!T.isInitialized())
+ T.init(Name);
+ return T;
}
-NamedRegionTimer::NamedRegionTimer(const std::string &Name)
+NamedRegionTimer::NamedRegionTimer(StringRef Name)
: TimeRegion(getNamedRegionTimer(Name)) {}
-NamedRegionTimer::NamedRegionTimer(const std::string &Name,
- const std::string &GroupName)
- : TimeRegion(getNamedRegionTimer(Name, GroupName)) {}
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName)
+ : TimeRegion(NamedGroupedTimers->get(Name, GroupName)) {}
//===----------------------------------------------------------------------===//
// TimerGroup Implementation
//===----------------------------------------------------------------------===//
+/// TimerGroupList - This is the global list of TimerGroups, maintained by the
+/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
+static TimerGroup *TimerGroupList = 0;
-static void printVal(double Val, double Total, raw_ostream &OS) {
- if (Total < 1e-7) // Avoid dividing by zero...
- OS << " ----- ";
- else {
- OS << " " << format("%7.4f", Val) << " (";
- OS << format("%5.1f", Val*100/Total) << "%)";
- }
+TimerGroup::TimerGroup(StringRef name)
+ : Name(name.begin(), name.end()), FirstTimer(0) {
+
+ // Add the group to TimerGroupList.
+ sys::SmartScopedLock<true> L(*TimerLock);
+ if (TimerGroupList)
+ TimerGroupList->Prev = &Next;
+ Next = TimerGroupList;
+ Prev = &TimerGroupList;
+ TimerGroupList = this;
}
-void Timer::print(const Timer &Total, raw_ostream &OS) {
+TimerGroup::~TimerGroup() {
+ // If the timer group is destroyed before the timers it owns, accumulate and
+ // print the timing data.
+ while (FirstTimer != 0)
+ removeTimer(*FirstTimer);
+
+ // Remove the group from the TimerGroupList.
sys::SmartScopedLock<true> L(*TimerLock);
- if (Total.UserTime)
- printVal(UserTime, Total.UserTime, OS);
- if (Total.SystemTime)
- printVal(SystemTime, Total.SystemTime, OS);
- if (Total.getProcessTime())
- printVal(getProcessTime(), Total.getProcessTime(), OS);
- printVal(Elapsed, Total.Elapsed, OS);
-
- OS << " ";
-
- if (Total.MemUsed) {
- OS << format("%9lld", (long long)MemUsed) << " ";
- }
- if (Total.PeakMem) {
- if (PeakMem) {
- OS << format("%9lld", (long long)PeakMem) << " ";
- } else
- OS << " ";
- }
- OS << Name << "\n";
-
- Started = false; // Once printed, don't print again
+ *Prev = Next;
+ if (Next)
+ Next->Prev = Prev;
}
-// GetLibSupportInfoOutputFile - Return a file stream to print our output on...
-raw_ostream *
-llvm::GetLibSupportInfoOutputFile() {
- std::string &LibSupportInfoOutputFilename = getLibSupportInfoOutputFilename();
- if (LibSupportInfoOutputFilename.empty())
- return &errs();
- if (LibSupportInfoOutputFilename == "-")
- return &outs();
-
- std::string Error;
- raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(),
- Error, raw_fd_ostream::F_Append);
- if (Error.empty())
- return Result;
-
- errs() << "Error opening info-output-file '"
- << LibSupportInfoOutputFilename << " for appending!\n";
- delete Result;
- return &errs();
+void TimerGroup::removeTimer(Timer &T) {
+ sys::SmartScopedLock<true> L(*TimerLock);
+
+ // If the timer was started, move its data to TimersToPrint.
+ if (T.Started)
+ TimersToPrint.push_back(std::make_pair(T.Time, T.Name));
+
+ T.TG = 0;
+
+ // Unlink the timer from our list.
+ *T.Prev = T.Next;
+ if (T.Next)
+ T.Next->Prev = T.Prev;
+
+ // Print the report when all timers in this group are destroyed if some of
+ // them were started.
+ if (FirstTimer != 0 || TimersToPrint.empty())
+ return;
+
+ raw_ostream *OutStream = CreateInfoOutputFile();
+ PrintQueuedTimers(*OutStream);
+ delete OutStream; // Close the file.
}
-
-void TimerGroup::removeTimer() {
+void TimerGroup::addTimer(Timer &T) {
sys::SmartScopedLock<true> L(*TimerLock);
- if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report...
- // Sort the timers in descending order by amount of time taken...
- std::sort(TimersToPrint.begin(), TimersToPrint.end(),
- std::greater<Timer>());
-
- // Figure out how many spaces to indent TimerGroup name...
- unsigned Padding = (80-Name.length())/2;
- if (Padding > 80) Padding = 0; // Don't allow "negative" numbers
-
- raw_ostream *OutStream = GetLibSupportInfoOutputFile();
-
- ++NumTimers;
- { // Scope to contain Total timer... don't allow total timer to drop us to
- // zero timers...
- Timer Total("TOTAL");
-
- for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
- Total.sum(TimersToPrint[i]);
-
- // Print out timing header...
- *OutStream << "===" << std::string(73, '-') << "===\n"
- << std::string(Padding, ' ') << Name << "\n"
- << "===" << std::string(73, '-')
- << "===\n";
-
- // If this is not an collection of ungrouped times, print the total time.
- // Ungrouped timers don't really make sense to add up. We still print the
- // TOTAL line to make the percentages make sense.
- if (this != DefaultTimerGroup) {
- *OutStream << " Total Execution Time: ";
-
- *OutStream << format("%5.4f", Total.getProcessTime()) << " seconds (";
- *OutStream << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
- }
- *OutStream << "\n";
-
- if (Total.UserTime)
- *OutStream << " ---User Time---";
- if (Total.SystemTime)
- *OutStream << " --System Time--";
- if (Total.getProcessTime())
- *OutStream << " --User+System--";
- *OutStream << " ---Wall Time---";
- if (Total.getMemUsed())
- *OutStream << " ---Mem---";
- if (Total.getPeakMem())
- *OutStream << " -PeakMem-";
- *OutStream << " --- Name ---\n";
-
- // Loop through all of the timing data, printing it out...
- for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
- TimersToPrint[i].print(Total, *OutStream);
-
- Total.print(Total, *OutStream);
- *OutStream << '\n';
- OutStream->flush();
- }
- --NumTimers;
-
- TimersToPrint.clear();
-
- if (OutStream != &errs() && OutStream != &outs() && OutStream != &dbgs())
- delete OutStream; // Close the file...
+
+ // Add the timer to our list.
+ if (FirstTimer)
+ FirstTimer->Prev = &T.Next;
+ T.Next = FirstTimer;
+ T.Prev = &FirstTimer;
+ FirstTimer = &T;
+}
+
+void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
+ // Sort the timers in descending order by amount of time taken.
+ std::sort(TimersToPrint.begin(), TimersToPrint.end());
+
+ TimeRecord Total;
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i)
+ Total += TimersToPrint[i].first;
+
+ // Print out timing header.
+ OS << "===" << std::string(73, '-') << "===\n";
+ // Figure out how many spaces to indent TimerGroup name.
+ unsigned Padding = (80-Name.length())/2;
+ if (Padding > 80) Padding = 0; // Don't allow "negative" numbers
+ OS.indent(Padding) << Name << '\n';
+ OS << "===" << std::string(73, '-') << "===\n";
+
+ // If this is not an collection of ungrouped times, print the total time.
+ // Ungrouped timers don't really make sense to add up. We still print the
+ // TOTAL line to make the percentages make sense.
+ if (this != DefaultTimerGroup) {
+ OS << " Total Execution Time: ";
+ OS << format("%5.4f", Total.getProcessTime()) << " seconds (";
+ OS << format("%5.4f", Total.getWallTime()) << " wall clock)\n";
+ }
+ OS << '\n';
+
+ if (Total.getUserTime())
+ OS << " ---User Time---";
+ if (Total.getSystemTime())
+ OS << " --System Time--";
+ if (Total.getProcessTime())
+ OS << " --User+System--";
+ OS << " ---Wall Time---";
+ if (Total.getMemUsed())
+ OS << " ---Mem---";
+ OS << " --- Name ---\n";
+
+ // Loop through all of the timing data, printing it out.
+ for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) {
+ const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1];
+ Entry.first.print(Total, OS);
+ OS << Entry.second << '\n';
}
+
+ Total.print(Total, OS);
+ OS << "Total\n\n";
+ OS.flush();
+
+ TimersToPrint.clear();
}
-void TimerGroup::addTimer() {
+/// print - Print any started timers in this group and zero them.
+void TimerGroup::print(raw_ostream &OS) {
sys::SmartScopedLock<true> L(*TimerLock);
- ++NumTimers;
+
+ // See if any of our timers were started, if so add them to TimersToPrint and
+ // reset them.
+ for (Timer *T = FirstTimer; T; T = T->Next) {
+ if (!T->Started) continue;
+ TimersToPrint.push_back(std::make_pair(T->Time, T->Name));
+
+ // Clear out the time.
+ T->Started = 0;
+ T->Time = TimeRecord();
+ }
+
+ // If any timers were started, print the group.
+ if (!TimersToPrint.empty())
+ PrintQueuedTimers(OS);
}
-void TimerGroup::addTimerToPrint(const Timer &T) {
+/// printAll - This static method prints all timers and clears them all out.
+void TimerGroup::printAll(raw_ostream &OS) {
sys::SmartScopedLock<true> L(*TimerLock);
- TimersToPrint.push_back(Timer(true, T));
-}
+ for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+ TG->print(OS);
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 61bf0a7..9796ca5 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -189,7 +189,7 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
return Triple::UnknownArch;
}
-// Returns architecture name that is unsderstood by the target assembler.
+// Returns architecture name that is understood by the target assembler.
const char *Triple::getArchNameForAssembler() {
if (getOS() != Triple::Darwin && getVendor() != Triple::Apple)
return NULL;
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 071c924..f59bd0d 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -81,9 +81,9 @@ void raw_ostream::SetBuffered() {
SetUnbuffered();
}
-void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
+void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
BufferKind Mode) {
- assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
+ assert(((Mode == Unbuffered && BufferStart == 0 && Size == 0) ||
(Mode != Unbuffered && BufferStart && Size)) &&
"stream must be unbuffered or have at least one byte");
// Make sure the current buffer is free of content (we can't flush here; the
@@ -104,11 +104,11 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) {
// Zero is a special case.
if (N == 0)
return *this << '0';
-
+
char NumberBuffer[20];
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
char *CurPtr = EndPtr;
-
+
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
@@ -121,7 +121,7 @@ raw_ostream &raw_ostream::operator<<(long N) {
*this << '-';
N = -N;
}
-
+
return this->operator<<(static_cast<unsigned long>(N));
}
@@ -133,7 +133,7 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) {
char NumberBuffer[20];
char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
char *CurPtr = EndPtr;
-
+
while (N) {
*--CurPtr = '0' + char(N % 10);
N /= 10;
@@ -146,7 +146,7 @@ raw_ostream &raw_ostream::operator<<(long long N) {
*this << '-';
N = -N;
}
-
+
return this->operator<<(static_cast<unsigned long long>(N));
}
@@ -297,33 +297,33 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
size_t BufferBytesLeft = OutBufEnd - OutBufCur;
if (BufferBytesLeft > 3) {
size_t BytesUsed = Fmt.print(OutBufCur, BufferBytesLeft);
-
+
// Common case is that we have plenty of space.
if (BytesUsed <= BufferBytesLeft) {
OutBufCur += BytesUsed;
return *this;
}
-
+
// Otherwise, we overflowed and the return value tells us the size to try
// again with.
NextBufferSize = BytesUsed;
}
-
+
// If we got here, we didn't have enough space in the output buffer for the
// string. Try printing into a SmallVector that is resized to have enough
// space. Iterate until we win.
SmallVector<char, 128> V;
-
+
while (1) {
V.resize(NextBufferSize);
-
+
// Try formatting into the SmallVector.
size_t BytesUsed = Fmt.print(V.data(), NextBufferSize);
-
+
// If BytesUsed fit into the vector, we win.
if (BytesUsed <= NextBufferSize)
return write(V.data(), BytesUsed);
-
+
// Otherwise, try again with a new size.
assert(BytesUsed > NextBufferSize && "Didn't grow buffer!?");
NextBufferSize = BytesUsed;
@@ -339,7 +339,7 @@ raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
// Usually the indentation is small, handle it with a fastpath.
if (NumSpaces < array_lengthof(Spaces))
return write(Spaces, NumSpaces);
-
+
while (NumSpaces) {
unsigned NumToWrite = std::min(NumSpaces,
(unsigned)array_lengthof(Spaces)-1);
@@ -372,7 +372,7 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
// Verify that we don't have both "append" and "excl".
assert((!(Flags & F_Excl) || !(Flags & F_Append)) &&
"Cannot specify both 'excl' and 'append' file creation flags!");
-
+
ErrorInfo.clear();
// Handle "-" as stdout.
@@ -385,20 +385,20 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo,
ShouldClose = false;
return;
}
-
+
int OpenFlags = O_WRONLY|O_CREAT;
#ifdef O_BINARY
if (Flags & F_Binary)
OpenFlags |= O_BINARY;
#endif
-
+
if (Flags & F_Append)
OpenFlags |= O_APPEND;
else
OpenFlags |= O_TRUNC;
if (Flags & F_Excl)
OpenFlags |= O_EXCL;
-
+
FD = open(Filename, OpenFlags, 0664);
if (FD < 0) {
ErrorInfo = "Error opening output file '" + std::string(Filename) + "'";
@@ -418,14 +418,14 @@ raw_fd_ostream::~raw_fd_ostream() {
void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
- assert (FD >= 0 && "File already closed.");
+ assert(FD >= 0 && "File already closed.");
pos += Size;
if (::write(FD, Ptr, Size) != (ssize_t) Size)
error_detected();
}
void raw_fd_ostream::close() {
- assert (ShouldClose);
+ assert(ShouldClose);
ShouldClose = false;
flush();
if (::close(FD) != 0)
@@ -438,7 +438,7 @@ uint64_t raw_fd_ostream::seek(uint64_t off) {
pos = ::lseek(FD, off, SEEK_SET);
if (pos != off)
error_detected();
- return pos;
+ return pos;
}
size_t raw_fd_ostream::preferred_buffer_size() const {
@@ -447,7 +447,7 @@ size_t raw_fd_ostream::preferred_buffer_size() const {
struct stat statbuf;
if (fstat(FD, &statbuf) != 0)
return 0;
-
+
// If this is a terminal, don't use buffering. Line buffering
// would be a more traditional thing to do, but it's not worth
// the complexity.
diff --git a/lib/System/Unix/Mutex.inc b/lib/System/Unix/Mutex.inc
index 10e7ecb..4a5e28d 100644
--- a/lib/System/Unix/Mutex.inc
+++ b/lib/System/Unix/Mutex.inc
@@ -29,12 +29,6 @@ MutexImpl::~MutexImpl()
}
bool
-MutexImpl::MutexImpl()
-{
- return true;
-}
-
-bool
MutexImpl::release()
{
return true;
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index a99720c..52253b3 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -454,7 +454,7 @@ Path::canWrite() const {
bool
Path::isRegularFile() const {
- // Get the status so we can determine if its a file or directory
+ // Get the status so we can determine if it's a file or directory
struct stat buf;
if (0 != stat(path.c_str(), &buf))
@@ -736,7 +736,7 @@ Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) {
bool
Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const {
- // Get the status so we can determine if its a file or directory
+ // Get the status so we can determine if it's a file or directory.
struct stat buf;
if (0 != stat(path.c_str(), &buf)) {
MakeErrMsg(ErrStr, path + ": can't get status of file");
diff --git a/lib/System/Win32/Program.inc b/lib/System/Win32/Program.inc
index a3b40d0..16bb28e 100644
--- a/lib/System/Win32/Program.inc
+++ b/lib/System/Win32/Program.inc
@@ -138,6 +138,24 @@ static bool ArgNeedsQuotes(const char *Str) {
return Str[0] == '\0' || strchr(Str, ' ') != 0;
}
+
+/// ArgLenWithQuotes - Check whether argument needs to be quoted when calling
+/// CreateProcess and returns length of quoted arg with escaped quotes
+static unsigned int ArgLenWithQuotes(const char *Str) {
+ unsigned int len = ArgNeedsQuotes(Str) ? 2 : 0;
+
+ while (*Str != '\0') {
+ if (*Str == '\"')
+ ++len;
+
+ ++len;
+ ++Str;
+ }
+
+ return len;
+}
+
+
bool
Program::Execute(const Path& path,
const char** args,
@@ -165,9 +183,7 @@ Program::Execute(const Path& path,
// First, determine the length of the command line.
unsigned len = 0;
for (unsigned i = 0; args[i]; i++) {
- len += strlen(args[i]) + 1;
- if (ArgNeedsQuotes(args[i]))
- len += 2;
+ len += ArgLenWithQuotes(args[i]) + 1;
}
// Now build the command line.
@@ -176,12 +192,18 @@ Program::Execute(const Path& path,
for (unsigned i = 0; args[i]; i++) {
const char *arg = args[i];
- size_t len = strlen(arg);
+
bool needsQuoting = ArgNeedsQuotes(arg);
if (needsQuoting)
*p++ = '"';
- memcpy(p, arg, len);
- p += len;
+
+ while (*arg != '\0') {
+ if (*arg == '\"')
+ *p++ = '\\';
+
+ *p++ = *arg++;
+ }
+
if (needsQuoting)
*p++ = '"';
*p++ = ' ';
diff --git a/lib/System/Win32/Signals.inc b/lib/System/Win32/Signals.inc
index dba2218..f2b72ca 100644
--- a/lib/System/Win32/Signals.inc
+++ b/lib/System/Win32/Signals.inc
@@ -163,6 +163,7 @@ void sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >();
CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie));
RegisterHandler();
+ LeaveCriticalSection(&CriticalSection);
}
}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index bbb1dbd..6486a60 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -43,6 +43,21 @@ def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true",
"Enable half-precision floating point">;
+// Some processors have multiply-accumulate instructions that don't
+// play nicely with other VFP instructions, and it's generally better
+// to just not use them.
+// FIXME: Currently, this is only flagged for Cortex-A8. It may be true for
+// others as well. We should do more benchmarking and confirm one way or
+// the other.
+def FeatureHasSlowVMLx : SubtargetFeature<"vmlx", "SlowVMLx", "true",
+ "Disable VFP MAC instructions">;
+// Some processors benefit from using NEON instructions for scalar
+// single-precision FP operations.
+def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
+ "true",
+ "Use NEON for single precision FP">;
+
+
//===----------------------------------------------------------------------===//
// ARM Processors supported.
//
@@ -92,7 +107,8 @@ def : ProcNoItin<"iwmmxt", [ArchV5TE]>;
// V6 Processors.
def : Processor<"arm1136j-s", ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
+def : Processor<"arm1136jf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2,
+ FeatureHasSlowVMLx]>;
def : Processor<"arm1176jz-s", ARMV6Itineraries, [ArchV6]>;
def : Processor<"arm1176jzf-s", ARMV6Itineraries, [ArchV6, FeatureVFP2]>;
def : Processor<"mpcorenovfp", ARMV6Itineraries, [ArchV6]>;
@@ -106,7 +122,8 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
- [ArchV7A, FeatureThumb2, FeatureNEON]>;
+ [ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
+ FeatureNEONForFP]>;
def : ProcNoItin<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index e6ea03a..0a0b0ea 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -204,7 +204,15 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -275,6 +283,11 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (!isUncondBranchOpcode(I->getOpcode()) &&
!isCondBranchOpcode(I->getOpcode()))
return 0;
@@ -738,14 +751,16 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q))
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO)
.addReg(SrcReg, getKillRegState(isKill)));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQ)).
addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addMemOperand(MMO));
}
}
}
@@ -788,12 +803,14 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
if (Align >= 16
&& (getRegisterInfo().canRealignStack(MF))) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q), DestReg)
.addFrameIndex(FI).addImm(128)
.addMemOperand(MMO));
} else {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg)
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQ), DestReg)
+ .addFrameIndex(FI)
+ .addImm(ARM_AM::getAM5Opc(ARM_AM::ia, 4))
+ .addMemOperand(MMO));
}
}
}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 71207c8..7d48663 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -124,17 +124,17 @@ private:
/// SelectDYN_ALLOC - Select dynamic alloc for Thumb.
SDNode *SelectDYN_ALLOC(SDNode *N);
- /// SelectVLD - Select NEON load intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// SelectVLD - Select NEON load intrinsics. NumVecs should be
+ /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// loads of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVLD(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
/// SelectVST - Select NEON store intrinsics. NumVecs should
- /// be 2, 3 or 4. The opcode arrays specify the instructions used for
+ /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for
/// stores of D registers and even subregs and odd subregs of Q registers.
- /// For NumVecs == 2, QOpcodes1 is not used.
+ /// For NumVecs <= 2, QOpcodes1 is not used.
SDNode *SelectVST(SDNode *N, unsigned NumVecs, unsigned *DOpcodes,
unsigned *QOpcodes0, unsigned *QOpcodes1);
@@ -1022,7 +1022,7 @@ static EVT GetNEONSubregVT(EVT VT) {
SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range");
+ assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1047,6 +1047,9 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
+ break;
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@@ -1060,15 +1063,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VLD2,
- // loading 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VLD1 and VLD2,
+ // loading pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain };
- std::vector<EVT> ResTys(4, VT);
+ std::vector<EVT> ResTys(2 * NumVecs, RegVT);
ResTys.push_back(MVT::Other);
SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
- Chain = SDValue(VLd, 4);
+ Chain = SDValue(VLd, 2 * NumVecs);
// Combine the even and odd subregs to produce the result.
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
@@ -1109,7 +1112,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs,
SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
unsigned *DOpcodes, unsigned *QOpcodes0,
unsigned *QOpcodes1) {
- assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range");
+ assert(NumVecs >=1 && NumVecs <= 4 && "VST NumVecs out-of-range");
DebugLoc dl = N->getDebugLoc();
SDValue MemAddr, Align;
@@ -1134,6 +1137,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
case MVT::v8i16: OpcodeIndex = 1; break;
case MVT::v4f32:
case MVT::v4i32: OpcodeIndex = 2; break;
+ case MVT::v2i64: OpcodeIndex = 3;
+ assert(NumVecs == 1 && "v2i64 type only supported for VST1");
+ break;
}
SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
@@ -1154,9 +1160,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
}
EVT RegVT = GetNEONSubregVT(VT);
- if (NumVecs == 2) {
- // Quad registers are directly supported for VST2,
- // storing 2 pairs of D regs.
+ if (NumVecs <= 2) {
+ // Quad registers are directly supported for VST1 and VST2,
+ // storing pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec) {
Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT,
@@ -1167,7 +1173,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs,
Ops.push_back(Pred);
Ops.push_back(Reg0); // predicate register
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(),
+ 5 + 2 * NumVecs);
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1694,6 +1701,35 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
ResNode = SelectARMIndexedLoad(N);
if (ResNode)
return ResNode;
+
+ // VLDMQ must be custom-selected for "v2f64 load" to set the AM5Opc value.
+ if (Subtarget->hasVFP2() &&
+ N->getValueType(0).getSimpleVT().SimpleTy == MVT::v2f64) {
+ SDValue Chain = N->getOperand(0);
+ SDValue AM5Opc =
+ CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(1), AM5Opc, Pred, PredReg, Chain };
+ return CurDAG->getMachineNode(ARM::VLDMQ, dl, MVT::v2f64, MVT::Other,
+ Ops, 5);
+ }
+ // Other cases are autogenerated.
+ break;
+ }
+ case ISD::STORE: {
+ // VSTMQ must be custom-selected for "v2f64 store" to set the AM5Opc value.
+ if (Subtarget->hasVFP2() &&
+ N->getOperand(1).getValueType().getSimpleVT().SimpleTy == MVT::v2f64) {
+ SDValue Chain = N->getOperand(0);
+ SDValue AM5Opc =
+ CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::ia, 4), MVT::i32);
+ SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32);
+ SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(2),
+ AM5Opc, Pred, PredReg, Chain };
+ return CurDAG->getMachineNode(ARM::VSTMQ, dl, MVT::Other, Ops, 6);
+ }
// Other cases are autogenerated.
break;
}
@@ -1831,16 +1867,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
default:
break;
+ case Intrinsic::arm_neon_vld1: {
+ unsigned DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
+ ARM::VLD1d32, ARM::VLD1d64 };
+ unsigned QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
+ ARM::VLD1q32, ARM::VLD1q64 };
+ return SelectVLD(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vld2: {
unsigned DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
- ARM::VLD2d32, ARM::VLD2d64 };
+ ARM::VLD2d32, ARM::VLD1q64 };
unsigned QOpcodes[] = { ARM::VLD2q8, ARM::VLD2q16, ARM::VLD2q32 };
return SelectVLD(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vld3: {
unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16,
- ARM::VLD3d32, ARM::VLD3d64 };
+ ARM::VLD3d32, ARM::VLD1d64T };
unsigned QOpcodes0[] = { ARM::VLD3q8_UPD,
ARM::VLD3q16_UPD,
ARM::VLD3q32_UPD };
@@ -1852,7 +1896,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vld4: {
unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16,
- ARM::VLD4d32, ARM::VLD4d64 };
+ ARM::VLD4d32, ARM::VLD1d64Q };
unsigned QOpcodes0[] = { ARM::VLD4q8_UPD,
ARM::VLD4q16_UPD,
ARM::VLD4q32_UPD };
@@ -1883,16 +1927,24 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
}
+ case Intrinsic::arm_neon_vst1: {
+ unsigned DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
+ ARM::VST1d32, ARM::VST1d64 };
+ unsigned QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
+ ARM::VST1q32, ARM::VST1q64 };
+ return SelectVST(N, 1, DOpcodes, QOpcodes, 0);
+ }
+
case Intrinsic::arm_neon_vst2: {
unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
- ARM::VST2d32, ARM::VST2d64 };
+ ARM::VST2d32, ARM::VST1q64 };
unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 };
return SelectVST(N, 2, DOpcodes, QOpcodes, 0);
}
case Intrinsic::arm_neon_vst3: {
unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16,
- ARM::VST3d32, ARM::VST3d64 };
+ ARM::VST3d32, ARM::VST1d64T };
unsigned QOpcodes0[] = { ARM::VST3q8_UPD,
ARM::VST3q16_UPD,
ARM::VST3q32_UPD };
@@ -1904,7 +1956,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
case Intrinsic::arm_neon_vst4: {
unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16,
- ARM::VST4d32, ARM::VST4d64 };
+ ARM::VST4d32, ARM::VST1d64Q };
unsigned QOpcodes0[] = { ARM::VST4q8_UPD,
ARM::VST4q16_UPD,
ARM::VST4q32_UPD };
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 0d0a004..b6c81f6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -456,6 +456,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
// Generic (and overly aggressive) if-conversion limits.
setIfCvtBlockSizeLimit(10);
setIfCvtDupBlockSizeLimit(2);
+ } else if (Subtarget->hasV7Ops()) {
+ setIfCvtBlockSizeLimit(3);
+ setIfCvtDupBlockSizeLimit(1);
} else if (Subtarget->hasV6Ops()) {
setIfCvtBlockSizeLimit(2);
setIfCvtDupBlockSizeLimit(1);
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 4f6f05d..4427e50 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -1,10 +1,10 @@
//===- ARMInstrFormats.td - ARM Instruction Formats --*- tablegen -*---------=//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
@@ -59,7 +59,18 @@ def NEONDupFrm : Format<28>;
def MiscFrm : Format<29>;
def ThumbMiscFrm : Format<30>;
-def NLdStFrm : Format<31>;
+def NLdStFrm : Format<31>;
+def N1RegModImmFrm : Format<32>;
+def N2RegFrm : Format<33>;
+def NVCVTFrm : Format<34>;
+def NVDupLnFrm : Format<35>;
+def N2RegVShLFrm : Format<36>;
+def N2RegVShRFrm : Format<37>;
+def N3RegFrm : Format<38>;
+def N3RegVShFrm : Format<39>;
+def NVExtFrm : Format<40>;
+def NVMulSLFrm : Format<41>;
+def NVTBLFrm : Format<42>;
// Misc flags.
@@ -177,13 +188,13 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
// TSFlagsFields
AddrMode AM = am;
bits<4> AddrModeBits = AM.Value;
-
+
SizeFlagVal SZ = sz;
bits<3> SizeFlag = SZ.Value;
IndexMode IM = im;
bits<2> IndexModeBits = IM.Value;
-
+
Format F = f;
bits<6> Form = F.Value;
@@ -195,7 +206,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
//
bit isUnaryDataProc = 0;
bit canXformTo16Bit = 0;
-
+
let Constraints = cstr;
let Itinerary = itin;
}
@@ -214,9 +225,9 @@ class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
Format f, Domain d, string cstr, InstrItinClass itin>
: InstTemplate<am, sz, im, f, d, cstr, itin>;
-class PseudoInst<dag oops, dag iops, InstrItinClass itin,
+class PseudoInst<dag oops, dag iops, InstrItinClass itin,
string asm, list<dag> pattern>
- : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
+ : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
"", itin> {
let OutOperandList = oops;
let InOperandList = iops;
@@ -226,7 +237,7 @@ class PseudoInst<dag oops, dag iops, InstrItinClass itin,
// Almost all ARM instructions are predicable.
class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
- IndexMode im, Format f, InstrItinClass itin,
+ IndexMode im, Format f, InstrItinClass itin,
string opc, string asm, string cstr,
list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
@@ -238,9 +249,9 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
// A few are not predicable
class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
- IndexMode im, Format f, InstrItinClass itin,
- string opc, string asm, string cstr,
- list<dag> pattern>
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
: InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
@@ -290,9 +301,9 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern>;
class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string asm, list<dag> pattern>
: InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
- opc, asm, "", pattern>;
+ opc, asm, "", pattern>;
// Ctrl flow instructions
class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -362,7 +373,7 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24-21} = opcod;
let Inst{27-26} = {0,0};
}
-class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
+class AI1x2<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode1, Size8Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern>;
@@ -387,7 +398,7 @@ class AI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-26} = {0,1};
}
-class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
+class AXI2ldw<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
@@ -407,7 +418,7 @@ class AI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-26} = {0,1};
}
-class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
+class AXI2ldb<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrMode2, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern> {
@@ -549,7 +560,7 @@ class AI2stbpo<dag oops, dag iops, Format f, InstrItinClass itin,
}
// addrmode3 instructions
-class AI3<dag oops, dag iops, Format f, InstrItinClass itin,
+class AI3<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
opc, asm, "", pattern>;
@@ -853,7 +864,6 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{27-25} = 0b000;
}
-
// addrmode4 instructions
class AXI4ld<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
@@ -961,20 +971,25 @@ class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
: ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
// Two-address instructions
-class TIt<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
- : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst", pattern>;
+class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst",
+ pattern>;
// tBL, tBX 32-bit instructions
class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
- dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
- : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>, Encoding {
+ dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
+ : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>,
+ Encoding {
let Inst{31-27} = opcod1;
let Inst{15-14} = opcod2;
let Inst{12} = opcod3;
}
// BR_JT instructions
-class TJTI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
+class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
+ list<dag> pattern>
: ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
// Thumb1 only
@@ -1001,7 +1016,7 @@ class T1JTI<dag oops, dag iops, InstrItinClass itin,
// Two-address instructions
class T1It<dag oops, dag iops, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
- : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
+ : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
asm, cstr, pattern>;
// Thumb1 instruction that can either be predicated or set CPSR.
@@ -1024,7 +1039,7 @@ class T1sI<dag oops, dag iops, InstrItinClass itin,
class T1sIt<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
- "$lhs = $dst", pattern>;
+ "$lhs = $dst", pattern>;
// Thumb1 instruction that can be predicated.
class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
@@ -1046,7 +1061,7 @@ class T1pI<dag oops, dag iops, InstrItinClass itin,
class T1pIt<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
- "$lhs = $dst", pattern>;
+ "$lhs = $dst", pattern>;
class T1pI1<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -1057,7 +1072,7 @@ class T1pI2<dag oops, dag iops, InstrItinClass itin,
class T1pI4<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeT1_4, Size2Bytes, itin, opc, asm, "", pattern>;
-class T1pIs<dag oops, dag iops,
+class T1pIs<dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
@@ -1146,8 +1161,8 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
}
class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
- InstrItinClass itin,
- string asm, string cstr, list<dag> pattern>
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
: InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = iops;
@@ -1161,7 +1176,7 @@ class T2I<dag oops, dag iops, InstrItinClass itin,
: Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
class T2Ii12<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
- : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "", pattern>;
+ : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>;
class T2Ii8<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
@@ -1196,7 +1211,7 @@ class T2JTI<dag oops, dag iops, InstrItinClass itin,
: Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
class T2Ix2<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+ string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, Size8Bytes, itin, opc, asm, "", pattern>;
// Two-address instructions
@@ -1295,7 +1310,7 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
- VFPLdStFrm, itin, opc, asm, "", pattern> {
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
@@ -1309,7 +1324,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
- VFPLdStFrm, itin, opc, asm, "", pattern> {
+ VFPLdStFrm, itin, opc, asm, "", pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-24} = opcod1;
let Inst{21-20} = opcod2;
@@ -1320,7 +1335,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
- VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-8} = 0b1011;
@@ -1332,7 +1347,7 @@ class AXDI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
class AXSI5<dag oops, dag iops, IndexMode im, InstrItinClass itin,
string asm, string cstr, list<dag> pattern>
: VFPXI<oops, iops, AddrMode5, Size4Bytes, im,
- VFPLdStMulFrm, itin, asm, cstr, pattern> {
+ VFPLdStMulFrm, itin, asm, cstr, pattern> {
// TODO: Mark the instructions with the appropriate subtarget info.
let Inst{27-25} = 0b110;
let Inst{11-8} = 0b1010;
@@ -1353,7 +1368,8 @@ class ADuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
// Double precision, binary
class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
- dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
: VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
let Inst{27-23} = opcod1;
let Inst{21-20} = opcod2;
@@ -1362,6 +1378,20 @@ class ADbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
let Inst{4} = op4;
}
+// Double precision, binary, VML[AS] (for additional predicate)
+class ADbI_vmlX<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
+ : VFPAI<oops, iops, VFPBinaryFrm, itin, opc, asm, pattern> {
+ let Inst{27-23} = opcod1;
+ let Inst{21-20} = opcod2;
+ let Inst{11-8} = 0b1011;
+ let Inst{6} = op6;
+ let Inst{4} = op4;
+ list<Predicate> Predicates = [HasVFP2, UseVMLx];
+}
+
+
// Single precision, unary
class ASuI<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4,
bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc,
@@ -1399,7 +1429,8 @@ class ASbI<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops,
// Single precision binary, if no NEON
// Same as ASbI except not available if NEON is enabled
class ASbIn<bits<5> opcod1, bits<2> opcod2, bit op6, bit op4, dag oops,
- dag iops, InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
: ASbI<opcod1, opcod2, op6, op4, oops, iops, itin, opc, asm, pattern> {
list<Predicate> Predicates = [HasVFP2,DontUseNEONForFP];
}
@@ -1419,8 +1450,8 @@ class AVConv1I<bits<5> opcod1, bits<2> opcod2, bits<4> opcod3, bits<4> opcod4,
// VFP conversion between floating-point and fixed-point
class AVConv1XI<bits<5> op1, bits<2> op2, bits<4> op3, bits<4> op4, bit op5,
- dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
+ dag oops, dag iops, InstrItinClass itin, string opc, string asm,
+ list<dag> pattern>
: AVConv1I<op1, op2, op3, op4, oops, iops, itin, opc, asm, pattern> {
// size (fixed-point number): sx == 0 ? 16 : 32
let Inst{7} = op5; // sx
@@ -1448,7 +1479,7 @@ class AVConv2I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: AVConvXI<opcod1, opcod2, oops, iops, VFPConv2Frm, itin, opc, asm, pattern>;
-class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
+class AVConv3I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
InstrItinClass itin, string opc, string asm, list<dag> pattern>
: AVConvXI<opcod1, opcod2, oops, iops, VFPConv3Frm, itin, opc, asm, pattern>;
@@ -1480,9 +1511,10 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
}
// Same as NeonI except it does not have a "data type" specifier.
-class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> {
+class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
+ InstrItinClass itin, string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
let OutOperandList = oops;
let InOperandList = !con(iops, (ins pred:$p));
let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm));
@@ -1490,18 +1522,6 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin,
list<Predicate> Predicates = [HasNEON];
}
-class NI<dag oops, dag iops, InstrItinClass itin, string opc, string asm,
- list<dag> pattern>
- : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm, "",
- pattern> {
-}
-
-class NI4<dag oops, dag iops, InstrItinClass itin, string opc,
- string asm, list<dag> pattern>
- : NeonXI<oops, iops, AddrMode4, IndexModeNone, itin, opc, asm, "",
- pattern> {
-}
-
class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
@@ -1514,17 +1534,17 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
let Inst{7-4} = op7_4;
}
-class NDataI<dag oops, dag iops, InstrItinClass itin,
+class NDataI<dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm,
- cstr, pattern> {
+ : NeonI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, dt, asm, cstr,
+ pattern> {
let Inst{31-25} = 0b1111001;
}
-class NDataXI<dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : NeonXI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, asm,
- cstr, pattern> {
+class NDataXI<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NeonXI<oops, iops, AddrModeNone, IndexModeNone, f, itin, opc, asm,
+ cstr, pattern> {
let Inst{31-25} = 0b1111001;
}
@@ -1532,8 +1552,9 @@ class NDataXI<dag oops, dag iops, InstrItinClass itin,
class N1ModImm<bit op23, bits<3> op21_19, bits<4> op11_8, bit op7, bit op6,
bit op5, bit op4,
dag oops, dag iops, InstrItinClass itin,
- string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ string opc, string dt, string asm, string cstr,
+ list<dag> pattern>
+ : NDataI<oops, iops, N1RegModImmFrm, itin, opc, dt, asm, cstr, pattern> {
let Inst{23} = op23;
let Inst{21-19} = op21_19;
let Inst{11-8} = op11_8;
@@ -1548,7 +1569,7 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
bits<5> op11_7, bit op6, bit op4,
dag oops, dag iops, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ : NDataI<oops, iops, N2RegFrm, itin, opc, dt, asm, cstr, pattern> {
let Inst{24-23} = op24_23;
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
@@ -1560,10 +1581,10 @@ class N2V<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
// Same as N2V except it doesn't have a datatype suffix.
class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
- bits<5> op11_7, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
+ bits<5> op11_7, bit op6, bit op4,
+ dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, N2RegFrm, itin, opc, asm, cstr, pattern> {
let Inst{24-23} = op24_23;
let Inst{21-20} = op21_20;
let Inst{19-18} = op19_18;
@@ -1575,9 +1596,9 @@ class N2VX<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16,
// NEON 2 vector register with immediate.
class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
+ dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{11-8} = op11_8;
@@ -1588,9 +1609,9 @@ class N2VImm<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
// NEON 3 vector register format.
class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
+ dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, string cstr, list<dag> pattern>
- : NDataI<oops, iops, itin, opc, dt, asm, cstr, pattern> {
+ : NDataI<oops, iops, f, itin, opc, dt, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1599,11 +1620,12 @@ class N3V<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
let Inst{4} = op4;
}
-// Same as N3VX except it doesn't have a data type suffix.
-class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6, bit op4,
- dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, string cstr, list<dag> pattern>
- : NDataXI<oops, iops, itin, opc, asm, cstr, pattern> {
+// Same as N3V except it doesn't have a data type suffix.
+class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
+ bit op4,
+ dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : NDataXI<oops, iops, f, itin, opc, asm, cstr, pattern> {
let Inst{24} = op24;
let Inst{23} = op23;
let Inst{21-20} = op21_20;
@@ -1617,7 +1639,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string dt, string asm, list<dag> pattern>
: InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, GenericDomain,
- "", itin> {
+ "", itin> {
let Inst{27-20} = opcod1;
let Inst{11-8} = opcod2;
let Inst{6-5} = opcod3;
@@ -1647,6 +1669,19 @@ class NVDup<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
: NVLaneOp<opcod1, opcod2, opcod3, oops, iops, NEONDupFrm, itin,
opc, dt, asm, pattern>;
+// Vector Duplicate Lane (from scalar to all elements)
+class NVDupLane<bits<4> op19_16, bit op6, dag oops, dag iops,
+ InstrItinClass itin, string opc, string dt, string asm,
+ list<dag> pattern>
+ : NDataI<oops, iops, NVDupLnFrm, itin, opc, dt, asm, "", pattern> {
+ let Inst{24-23} = 0b11;
+ let Inst{21-20} = 0b11;
+ let Inst{19-16} = op19_16;
+ let Inst{11-7} = 0b11000;
+ let Inst{6} = op6;
+ let Inst{4} = 0;
+}
+
// NEONFPPat - Same as Pat<>, but requires that the compiler be using NEON
// for single-precision FP.
class NEONFPPat<dag pattern, dag result> : Pat<pattern, result> {
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 26a2806..f2ab06f 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -140,6 +140,8 @@ def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
def UseMovt : Predicate<"Subtarget->useMovt()">;
def DontUseMovt : Predicate<"!Subtarget->useMovt()">;
+def UseVMLx : Predicate<"Subtarget->useVMLx()">;
+
//===----------------------------------------------------------------------===//
// ARM Flag Definitions.
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index c977cc3..ed9d31d 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -115,51 +115,80 @@ def h64imm : Operand<i64> {
// NEON load / store instructions
//===----------------------------------------------------------------------===//
+let mayLoad = 1 in {
// Use vldmia to load a Q register as a D register pair.
-def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
- "vldmia", "$addr, ${dst:dregpair}",
- [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
- let Inst{27-25} = 0b110;
- let Inst{24} = 0; // P bit
- let Inst{23} = 1; // U bit
- let Inst{20} = 1;
- let Inst{11-8} = 0b1011;
-}
+// This is equivalent to VLDMD except that it has a Q register operand
+// instead of a pair of D registers.
+def VLDMQ
+ : AXDI5<(outs QPR:$dst), (ins addrmode5:$addr, pred:$p),
+ IndexModeNone, IIC_fpLoadm,
+ "vldm${addr:submode}${p}\t${addr:base}, ${dst:dregpair}", "", []>;
+def VLDMQ_UPD
+ : AXDI5<(outs QPR:$dst, GPR:$wb), (ins addrmode5:$addr, pred:$p),
+ IndexModeUpd, IIC_fpLoadm,
+ "vldm${addr:submode}${p}\t${addr:base}!, ${dst:dregpair}",
+ "$addr.base = $wb", []>;
+
+// Use vld1 to load a Q register as a D register pair.
+// This alternative to VLDMQ allows an alignment to be specified.
+// This is equivalent to VLD1q64 except that it has a Q register operand.
+def VLD1q
+ : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst), (ins addrmode6:$addr),
+ IIC_VLD1, "vld1", "64", "${dst:dregpair}, $addr", "", []>;
+def VLD1q_UPD
+ : NLdSt<0,0b10,0b1010,0b1100, (outs QPR:$dst, GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", "64",
+ "${dst:dregpair}, $addr$offset", "$addr.addr = $wb", []>;
+} // mayLoad = 1
+let mayStore = 1 in {
// Use vstmia to store a Q register as a D register pair.
-def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
- "vstmia", "$addr, ${src:dregpair}",
- [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
- let Inst{27-25} = 0b110;
- let Inst{24} = 0; // P bit
- let Inst{23} = 1; // U bit
- let Inst{20} = 0;
- let Inst{11-8} = 0b1011;
-}
+// This is equivalent to VSTMD except that it has a Q register operand
+// instead of a pair of D registers.
+def VSTMQ
+ : AXDI5<(outs), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ IndexModeNone, IIC_fpStorem,
+ "vstm${addr:submode}${p}\t${addr:base}, ${src:dregpair}", "", []>;
+def VSTMQ_UPD
+ : AXDI5<(outs GPR:$wb), (ins QPR:$src, addrmode5:$addr, pred:$p),
+ IndexModeUpd, IIC_fpStorem,
+ "vstm${addr:submode}${p}\t${addr:base}!, ${src:dregpair}",
+ "$addr.base = $wb", []>;
+
+// Use vst1 to store a Q register as a D register pair.
+// This alternative to VSTMQ allows an alignment to be specified.
+// This is equivalent to VST1q64 except that it has a Q register operand.
+def VST1q
+ : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, QPR:$src),
+ IIC_VST, "vst1", "64", "${src:dregpair}, $addr", "", []>;
+def VST1q_UPD
+ : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
+ (ins addrmode6:$addr, am6offset:$offset, QPR:$src),
+ IIC_VST, "vst1", "64", "{$src:dregpair}, $addr$offset",
+ "$addr.addr = $wb", []>;
+} // mayStore = 1
-// VLD1 : Vector Load (multiple single elements)
-class VLD1D<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "\\{$dst\\}, $addr", "",
- [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
-class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1,
- "vld1", Dt, "${dst:dregpair}, $addr", "",
- [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>;
-
-def VLD1d8 : VLD1D<0b0000, "8", v8i8>;
-def VLD1d16 : VLD1D<0b0100, "16", v4i16>;
-def VLD1d32 : VLD1D<0b1000, "32", v2i32>;
-def VLD1df : VLD1D<0b1000, "32", v2f32>;
-def VLD1d64 : VLD1D<0b1100, "64", v1i64>;
-
-def VLD1q8 : VLD1Q<0b0000, "8", v16i8>;
-def VLD1q16 : VLD1Q<0b0100, "16", v8i16>;
-def VLD1q32 : VLD1Q<0b1000, "32", v4i32>;
-def VLD1qf : VLD1Q<0b1000, "32", v4f32>;
-def VLD1q64 : VLD1Q<0b1100, "64", v2i64>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-let mayLoad = 1 in {
+// VLD1 : Vector Load (multiple single elements)
+class VLD1D<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst),
+ (ins addrmode6:$addr), IIC_VLD1,
+ "vld1", Dt, "\\{$dst\\}, $addr", "", []>;
+class VLD1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr), IIC_VLD1,
+ "vld1", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def VLD1d8 : VLD1D<0b0000, "8">;
+def VLD1d16 : VLD1D<0b0100, "16">;
+def VLD1d32 : VLD1D<0b1000, "32">;
+def VLD1d64 : VLD1D<0b1100, "64">;
+
+def VLD1q8 : VLD1Q<0b0000, "8">;
+def VLD1q16 : VLD1Q<0b0100, "16">;
+def VLD1q32 : VLD1Q<0b1000, "32">;
+def VLD1q64 : VLD1Q<0b1100, "64">;
// ...with address register writeback:
class VLD1DWB<bits<4> op7_4, string Dt>
@@ -182,54 +211,48 @@ def VLD1q8_UPD : VLD1QWB<0b0000, "8">;
def VLD1q16_UPD : VLD1QWB<0b0100, "16">;
def VLD1q32_UPD : VLD1QWB<0b1000, "32">;
def VLD1q64_UPD : VLD1QWB<0b1100, "64">;
-} // mayLoad = 1
-let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-
-// These (dreg triple/quadruple) are for disassembly only.
+// ...with 3 registers (some of these are only for the disassembler):
class VLD1D3<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-class VLD1D4<bits<4> op7_4, string Dt>
- : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-
-def VLD1d8T : VLD1D3<0b0000, "8">;
-def VLD1d16T : VLD1D3<0b0100, "16">;
-def VLD1d32T : VLD1D3<0b1000, "32">;
-// VLD1d64T : implemented as VLD3d64
-
-def VLD1d8Q : VLD1D4<0b0000, "8">;
-def VLD1d16Q : VLD1D4<0b0100, "16">;
-def VLD1d32Q : VLD1D4<0b1000, "32">;
-// VLD1d64Q : implemented as VLD4d64
-
-// ...with address register writeback:
+ "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
class VLD1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
- "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", []>;
+
+def VLD1d8T : VLD1D3<0b0000, "8">;
+def VLD1d16T : VLD1D3<0b0100, "16">;
+def VLD1d32T : VLD1D3<0b1000, "32">;
+def VLD1d64T : VLD1D3<0b1100, "64">;
+
+def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
+def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
+def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
+def VLD1d64T_UPD : VLD1D3WB<0b1100, "64">;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VLD1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt,
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>;
class VLD1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0,0b10,0b0010,op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt,
"\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ []>;
-def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">;
-def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">;
-def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">;
-// VLD1d64T_UPD : implemented as VLD3d64_UPD
+def VLD1d8Q : VLD1D4<0b0000, "8">;
+def VLD1d16Q : VLD1D4<0b0100, "16">;
+def VLD1d32Q : VLD1D4<0b1000, "32">;
+def VLD1d64Q : VLD1D4<0b1100, "64">;
def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">;
def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">;
def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">;
-// VLD1d64Q_UPD : implemented as VLD4d64_UPD
+def VLD1d64Q_UPD : VLD1D4WB<0b1100, "64">;
// VLD2 : Vector Load (multiple 2-element structures)
class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -245,9 +268,6 @@ class VLD2Q<bits<4> op7_4, string Dt>
def VLD2d8 : VLD2D<0b1000, 0b0000, "8">;
def VLD2d16 : VLD2D<0b1000, 0b0100, "16">;
def VLD2d32 : VLD2D<0b1000, 0b1000, "32">;
-def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>;
def VLD2q8 : VLD2Q<0b0000, "8">;
def VLD2q16 : VLD2Q<0b0100, "16">;
@@ -269,11 +289,6 @@ class VLD2QWB<bits<4> op7_4, string Dt>
def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">;
def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">;
def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">;
-def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
def VLD2q8_UPD : VLD2QWB<0b0000, "8">;
def VLD2q16_UPD : VLD2QWB<0b0100, "16">;
@@ -296,10 +311,6 @@ class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD3d8 : VLD3D<0b0100, 0b0000, "8">;
def VLD3d16 : VLD3D<0b0100, 0b0100, "16">;
def VLD3d32 : VLD3D<0b0100, 0b1000, "32">;
-def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>;
// ...with address register writeback:
class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -312,11 +323,6 @@ class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">;
def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">;
def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">;
-def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD3q8 : VLD3D<0b0101, 0b0000, "8">;
@@ -341,11 +347,6 @@ class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD4d8 : VLD4D<0b0000, 0b0000, "8">;
def VLD4d16 : VLD4D<0b0000, 0b0100, "16">;
def VLD4d32 : VLD4D<0b0000, 0b1000, "32">;
-def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
- (ins addrmode6:$addr), IIC_VLD1,
- "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr",
- "", []>;
// ...with address register writeback:
class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -358,13 +359,6 @@ class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">;
def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">;
def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">;
-def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100,
- (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4,
- GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1,
- "vld1", "64",
- "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VLD4q8 : VLD4D<0b0001, 0b0000, "8">;
@@ -383,62 +377,62 @@ def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">;
// FIXME: Not yet implemented.
// VLD2LN : Vector Load (single 2-element structure to one lane)
-class VLD2LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
+class VLD2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2", []>;
-def VLD2LNd8 : VLD2LN<0b0001, "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; }
-def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; }
+def VLD2LNd8 : VLD2LN<0b0001, {?,?,?,?}, "8">;
+def VLD2LNd16 : VLD2LN<0b0101, {?,?,0,?}, "16">;
+def VLD2LNd32 : VLD2LN<0b1001, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VLD2LNq16 : VLD2LN<0b0101, {?,?,1,?}, "16">;
+def VLD2LNq32 : VLD2LN<0b1001, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VLD2LNq16odd : VLD2LN<0b0101, {?,?,1,?}, "16">;
+def VLD2LNq32odd : VLD2LN<0b1001, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VLD2LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
+class VLD2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt,
"\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset",
"$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>;
-def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">;
-def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; }
-def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+def VLD2LNd8_UPD : VLD2LNWB<0b0001, {?,?,?,?}, "8">;
+def VLD2LNd16_UPD : VLD2LNWB<0b0101, {?,?,0,?}, "16">;
+def VLD2LNd32_UPD : VLD2LNWB<0b1001, {?,0,?,?}, "32">;
-def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; }
-def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; }
+def VLD2LNq16_UPD : VLD2LNWB<0b0101, {?,?,1,?}, "16">;
+def VLD2LNq32_UPD : VLD2LNWB<0b1001, {?,1,?,?}, "32">;
// VLD3LN : Vector Load (single 3-element structure to one lane)
-class VLD3LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+class VLD3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VLD3, "vld3", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
-def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; }
-def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VLD3LNd8 : VLD3LN<0b0010, {?,?,?,0}, "8">;
+def VLD3LNd16 : VLD3LN<0b0110, {?,?,0,0}, "16">;
+def VLD3LNd32 : VLD3LN<0b1010, {?,0,0,0}, "32">;
// ...with double-spaced registers:
-def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VLD3LNq16 : VLD3LN<0b0110, {?,?,1,0}, "16">;
+def VLD3LNq32 : VLD3LN<0b1010, {?,1,0,0}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VLD3LNq16odd : VLD3LN<0b0110, {?,?,1,0}, "16">;
+def VLD3LNq32odd : VLD3LN<0b1010, {?,1,0,0}, "32">;
// ...with address register writeback:
-class VLD3LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+class VLD3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
@@ -447,37 +441,37 @@ class VLD3LNWB<bits<4> op11_8, string Dt>
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb",
[]>;
-def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; }
-def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VLD3LNd8_UPD : VLD3LNWB<0b0010, {?,?,?,0}, "8">;
+def VLD3LNd16_UPD : VLD3LNWB<0b0110, {?,?,0,0}, "16">;
+def VLD3LNd32_UPD : VLD3LNWB<0b1010, {?,0,0,0}, "32">;
-def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VLD3LNq16_UPD : VLD3LNWB<0b0110, {?,?,1,0}, "16">;
+def VLD3LNq32_UPD : VLD3LNWB<0b1010, {?,1,0,0}, "32">;
// VLD4LN : Vector Load (single 4-element structure to one lane)
-class VLD4LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+class VLD4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VLD4, "vld4", Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
-def VLD4LNd8 : VLD4LN<0b0011, "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; }
-def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; }
+def VLD4LNd8 : VLD4LN<0b0011, {?,?,?,?}, "8">;
+def VLD4LNd16 : VLD4LN<0b0111, {?,?,0,?}, "16">;
+def VLD4LNd32 : VLD4LN<0b1011, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VLD4LNq16 : VLD4LN<0b0111, {?,?,1,?}, "16">;
+def VLD4LNq32 : VLD4LN<0b1011, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VLD4LNq16odd : VLD4LN<0b0111, {?,?,1,?}, "16">;
+def VLD4LNq32odd : VLD4LN<0b1011, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VLD4LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b10, op11_8, {?,?,?,?},
+class VLD4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b10, op11_8, op7_4,
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
@@ -486,12 +480,12 @@ class VLD4LNWB<bits<4> op11_8, string Dt>
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb",
[]>;
-def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">;
-def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; }
-def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+def VLD4LNd8_UPD : VLD4LNWB<0b0011, {?,?,?,?}, "8">;
+def VLD4LNd16_UPD : VLD4LNWB<0b0111, {?,?,0,?}, "16">;
+def VLD4LNd32_UPD : VLD4LNWB<0b1011, {?,0,?,?}, "32">;
-def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; }
-def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
+def VLD4LNq16_UPD : VLD4LNWB<0b0111, {?,?,1,?}, "16">;
+def VLD4LNq32_UPD : VLD4LNWB<0b1011, {?,1,?,?}, "32">;
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
@@ -500,31 +494,26 @@ def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; }
// FIXME: Not yet implemented.
} // mayLoad = 1, hasExtraDefRegAllocReq = 1
+let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+
// VST1 : Vector Store (multiple single elements)
-class VST1D<bits<4> op7_4, string Dt, ValueType Ty>
+class VST1D<bits<4> op7_4, string Dt>
: NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST,
- "vst1", Dt, "\\{$src\\}, $addr", "",
- [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>;
-class VST1Q<bits<4> op7_4, string Dt, ValueType Ty>
- : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST,
- "vst1", Dt, "${src:dregpair}, $addr", "",
- [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>;
-
-let hasExtraSrcRegAllocReq = 1 in {
-def VST1d8 : VST1D<0b0000, "8", v8i8>;
-def VST1d16 : VST1D<0b0100, "16", v4i16>;
-def VST1d32 : VST1D<0b1000, "32", v2i32>;
-def VST1df : VST1D<0b1000, "32", v2f32>;
-def VST1d64 : VST1D<0b1100, "64", v1i64>;
-
-def VST1q8 : VST1Q<0b0000, "8", v16i8>;
-def VST1q16 : VST1Q<0b0100, "16", v8i16>;
-def VST1q32 : VST1Q<0b1000, "32", v4i32>;
-def VST1qf : VST1Q<0b1000, "32", v4f32>;
-def VST1q64 : VST1Q<0b1100, "64", v2i64>;
-} // hasExtraSrcRegAllocReq
-
-let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
+ "vst1", Dt, "\\{$src\\}, $addr", "", []>;
+class VST1Q<bits<4> op7_4, string Dt>
+ : NLdSt<0,0b00,0b1010,op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+ "vst1", Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def VST1d8 : VST1D<0b0000, "8">;
+def VST1d16 : VST1D<0b0100, "16">;
+def VST1d32 : VST1D<0b1000, "32">;
+def VST1d64 : VST1D<0b1100, "64">;
+
+def VST1q8 : VST1Q<0b0000, "8">;
+def VST1q16 : VST1Q<0b0100, "16">;
+def VST1q32 : VST1Q<0b1000, "32">;
+def VST1q64 : VST1Q<0b1100, "64">;
// ...with address register writeback:
class VST1DWB<bits<4> op7_4, string Dt>
@@ -546,53 +535,50 @@ def VST1q16_UPD : VST1QWB<0b0100, "16">;
def VST1q32_UPD : VST1QWB<0b1000, "32">;
def VST1q64_UPD : VST1QWB<0b1100, "64">;
-// These (dreg triple/quadruple) are for disassembly only.
+// ...with 3 registers (some of these are only for the disassembler):
class VST1D3<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-class VST1D4<bits<4> op7_4, string Dt>
- : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
- [/* For disassembly only; pattern left blank */]>;
-
-def VST1d8T : VST1D3<0b0000, "8">;
-def VST1d16T : VST1D3<0b0100, "16">;
-def VST1d32T : VST1D3<0b1000, "32">;
-// VST1d64T : implemented as VST3d64
-
-def VST1d8Q : VST1D4<0b0000, "8">;
-def VST1d16Q : VST1D4<0b0100, "16">;
-def VST1d32Q : VST1D4<0b1000, "32">;
-// VST1d64Q : implemented as VST4d64
-
-// ...with address register writeback:
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>;
class VST1D3WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3),
IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset",
- "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ "$addr.addr = $wb", []>;
+
+def VST1d8T : VST1D3<0b0000, "8">;
+def VST1d16T : VST1D3<0b0100, "16">;
+def VST1d32T : VST1D3<0b1000, "32">;
+def VST1d64T : VST1D3<0b1100, "64">;
+
+def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
+def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
+def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
+def VST1d64T_UPD : VST1D3WB<0b1100, "64">;
+
+// ...with 4 registers (some of these are only for the disassembler):
+class VST1D4<bits<4> op7_4, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ []>;
class VST1D4WB<bits<4> op7_4, string Dt>
: NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb",
- [/* For disassembly only; pattern left blank */]>;
+ "$addr.addr = $wb", []>;
-def VST1d8T_UPD : VST1D3WB<0b0000, "8">;
-def VST1d16T_UPD : VST1D3WB<0b0100, "16">;
-def VST1d32T_UPD : VST1D3WB<0b1000, "32">;
-// VST1d64T_UPD : implemented as VST3d64_UPD
+def VST1d8Q : VST1D4<0b0000, "8">;
+def VST1d16Q : VST1D4<0b0100, "16">;
+def VST1d32Q : VST1D4<0b1000, "32">;
+def VST1d64Q : VST1D4<0b1100, "64">;
def VST1d8Q_UPD : VST1D4WB<0b0000, "8">;
def VST1d16Q_UPD : VST1D4WB<0b0100, "16">;
def VST1d32Q_UPD : VST1D4WB<0b1000, "32">;
-// VST1d64Q_UPD : implemented as VST4d64_UPD
+def VST1d64Q_UPD : VST1D4WB<0b1100, "64">;
// VST2 : Vector Store (multiple 2-element structures)
class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -608,9 +594,6 @@ class VST2Q<bits<4> op7_4, string Dt>
def VST2d8 : VST2D<0b1000, 0b0000, "8">;
def VST2d16 : VST2D<0b1000, 0b0100, "16">;
def VST2d32 : VST2D<0b1000, 0b1000, "32">;
-def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
- "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>;
def VST2q8 : VST2Q<0b0000, "8">;
def VST2q16 : VST2Q<0b0100, "16">;
@@ -632,11 +615,6 @@ class VST2QWB<bits<4> op7_4, string Dt>
def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">;
def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">;
def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">;
-def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2), IIC_VST,
- "vst1", "64", "\\{$src1, $src2\\}, $addr$offset",
- "$addr.addr = $wb", []>;
def VST2q8_UPD : VST2QWB<0b0000, "8">;
def VST2q16_UPD : VST2QWB<0b0100, "16">;
@@ -659,10 +637,6 @@ class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST3d8 : VST3D<0b0100, 0b0000, "8">;
def VST3d16 : VST3D<0b0100, 0b0100, "16">;
def VST3d32 : VST3D<0b0100, 0b1000, "32">;
-def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VST,
- "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>;
// ...with address register writeback:
class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -675,11 +649,6 @@ class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">;
def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">;
def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">;
-def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
- "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST3q8 : VST3D<0b0101, 0b0000, "8">;
@@ -704,11 +673,6 @@ class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST4d8 : VST4D<0b0000, 0b0000, "8">;
def VST4d16 : VST4D<0b0000, 0b0100, "16">;
def VST4d32 : VST4D<0b0000, 0b1000, "32">;
-def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- DPR:$src4), IIC_VST,
- "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr",
- "", []>;
// ...with address register writeback:
class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
@@ -721,12 +685,6 @@ class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt>
def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">;
def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">;
def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">;
-def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb),
- (ins addrmode6:$addr, am6offset:$offset,
- DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST,
- "vst1", "64",
- "\\{$src1, $src2, $src3, $src4\\}, $addr$offset",
- "$addr.addr = $wb", []>;
// ...with double-spaced registers (non-updating versions for disassembly only):
def VST4q8 : VST4D<0b0001, 0b0000, "8">;
@@ -745,109 +703,109 @@ def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">;
// FIXME: Not yet implemented.
// VST2LN : Vector Store (single 2-element structure from one lane)
-class VST2LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
+class VST2LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
"", []>;
-def VST2LNd8 : VST2LN<0b0001, "8">;
-def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; }
-def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; }
+def VST2LNd8 : VST2LN<0b0001, {?,?,?,?}, "8">;
+def VST2LNd16 : VST2LN<0b0101, {?,?,0,?}, "16">;
+def VST2LNd32 : VST2LN<0b1001, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VST2LNq16 : VST2LN<0b0101, {?,?,1,?}, "16">;
+def VST2LNq32 : VST2LN<0b1001, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; }
-def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; }
+def VST2LNq16odd : VST2LN<0b0101, {?,?,1,?}, "16">;
+def VST2LNq32odd : VST2LN<0b1001, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VST2LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+class VST2LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt,
"\\{$src1[$lane], $src2[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
-def VST2LNd8_UPD : VST2LNWB<0b0001, "8">;
-def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; }
-def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; }
+def VST2LNd8_UPD : VST2LNWB<0b0001, {?,?,?,?}, "8">;
+def VST2LNd16_UPD : VST2LNWB<0b0101, {?,?,0,?}, "16">;
+def VST2LNd32_UPD : VST2LNWB<0b1001, {?,0,?,?}, "32">;
-def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; }
-def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; }
+def VST2LNq16_UPD : VST2LNWB<0b0101, {?,?,1,?}, "16">;
+def VST2LNq32_UPD : VST2LNWB<0b1001, {?,1,?,?}, "32">;
// VST3LN : Vector Store (single 3-element structure from one lane)
-class VST3LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
+class VST3LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
nohash_imm:$lane), IIC_VST, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
-def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; }
-def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8 : VST3LN<0b0010, {?,?,?,0}, "8">;
+def VST3LNd16 : VST3LN<0b0110, {?,?,0,0}, "16">;
+def VST3LNd32 : VST3LN<0b1010, {?,0,0,0}, "32">;
// ...with double-spaced registers:
-def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16 : VST3LN<0b0110, {?,?,1,0}, "16">;
+def VST3LNq32 : VST3LN<0b1010, {?,1,0,0}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16odd : VST3LN<0b0110, {?,?,1,0}, "16">;
+def VST3LNq32odd : VST3LN<0b1010, {?,1,0,0}, "32">;
// ...with address register writeback:
-class VST3LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+class VST3LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane),
IIC_VST, "vst3", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
-def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; }
-def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; }
-def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; }
+def VST3LNd8_UPD : VST3LNWB<0b0010, {?,?,?,0}, "8">;
+def VST3LNd16_UPD : VST3LNWB<0b0110, {?,?,0,0}, "16">;
+def VST3LNd32_UPD : VST3LNWB<0b1010, {?,0,0,0}, "32">;
-def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; }
-def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; }
+def VST3LNq16_UPD : VST3LNWB<0b0110, {?,?,1,0}, "16">;
+def VST3LNq32_UPD : VST3LNWB<0b1010, {?,1,0,0}, "32">;
// VST4LN : Vector Store (single 4-element structure from one lane)
-class VST4LN<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs),
+class VST4LN<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
nohash_imm:$lane), IIC_VST, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
"", []>;
-def VST4LNd8 : VST4LN<0b0011, "8">;
-def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; }
-def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; }
+def VST4LNd8 : VST4LN<0b0011, {?,?,?,?}, "8">;
+def VST4LNd16 : VST4LN<0b0111, {?,?,0,?}, "16">;
+def VST4LNd32 : VST4LN<0b1011, {?,0,?,?}, "32">;
// ...with double-spaced registers:
-def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VST4LNq16 : VST4LN<0b0111, {?,?,1,?}, "16">;
+def VST4LNq32 : VST4LN<0b1011, {?,1,?,?}, "32">;
// ...alternate versions to be allocated odd register numbers:
-def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; }
-def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; }
+def VST4LNq16odd : VST4LN<0b0111, {?,?,1,?}, "16">;
+def VST4LNq32odd : VST4LN<0b1011, {?,1,?,?}, "32">;
// ...with address register writeback:
-class VST4LNWB<bits<4> op11_8, string Dt>
- : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb),
+class VST4LNWB<bits<4> op11_8, bits<4> op7_4, string Dt>
+ : NLdSt<1, 0b00, op11_8, op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, am6offset:$offset,
DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane),
IIC_VST, "vst4", Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset",
"$addr.addr = $wb", []>;
-def VST4LNd8_UPD : VST4LNWB<0b0011, "8">;
-def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; }
-def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; }
+def VST4LNd8_UPD : VST4LNWB<0b0011, {?,?,?,?}, "8">;
+def VST4LNd16_UPD : VST4LNWB<0b0111, {?,?,0,?}, "16">;
+def VST4LNd32_UPD : VST4LNWB<0b1011, {?,0,?,?}, "32">;
-def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; }
-def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; }
+def VST4LNq16_UPD : VST4LNWB<0b0111, {?,?,1,?}, "16">;
+def VST4LNq32_UPD : VST4LNWB<0b1011, {?,1,?,?}, "32">;
} // mayStore = 1, hasExtraSrcRegAllocReq = 1
@@ -906,18 +864,18 @@ class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
+ (ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt,"$dst, $src", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
+ (ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt,"$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4,
+ bits<2> op17_16, bits<5> op11_7, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
@@ -966,8 +924,8 @@ class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
- OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm,
+ IIC_VBIND, OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
let isCommutable = Commutable;
}
@@ -975,7 +933,7 @@ class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -986,27 +944,28 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
OpcodeStr, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
let isCommutable = Commutable;
}
+
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{
+ (Ty (NEONvduplane (Ty DPR_VFP2:$src2),imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, IIC_VMULi16D, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
@@ -1017,7 +976,7 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1026,7 +985,7 @@ class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, itin,
OpcodeStr, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
let isCommutable = Commutable;
@@ -1036,7 +995,7 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -1047,7 +1006,7 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, IIC_VMULi16Q, OpcodeStr, Dt,"$dst, $src1, $src2[$lane]","",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -1057,10 +1016,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), f, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1069,7 +1028,7 @@ class N3VDIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (IntOp (Ty DPR:$src1),
(Ty (NEONvduplane (Ty DPR_VFP2:$src2),
@@ -1080,19 +1039,18 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt, ValueType Ty, Intrinsic IntOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (IntOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_8:$src2),
- imm:$lane)))))]> {
+ (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), f, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1102,7 +1060,7 @@ class N3VQIntSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -1114,7 +1072,7 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -1128,14 +1086,14 @@ class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst, (Ty (OpNode DPR:$src1,
(Ty (MulOp DPR:$src2, DPR:$src3)))))]>;
@@ -1144,7 +1102,8 @@ class N3VDMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+ (ins DPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
@@ -1156,7 +1115,8 @@ class N3VDMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType Ty, SDNode MulOp, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+ (ins DPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
@@ -1168,7 +1128,7 @@ class N3VQMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty,
SDNode MulOp, SDNode OpNode>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst, (Ty (OpNode QPR:$src1,
(Ty (MulOp QPR:$src2, QPR:$src3)))))]>;
@@ -1177,7 +1137,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, QPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
@@ -1190,7 +1151,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
SDNode MulOp, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, QPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
@@ -1204,7 +1166,7 @@ class N3VDInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), itin,
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src1),
(OpTy DPR:$src2), (OpTy DPR:$src3))))]>;
@@ -1212,7 +1174,7 @@ class N3VQInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), itin,
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src1),
(OpTy QPR:$src2), (OpTy QPR:$src3))))]>;
@@ -1223,7 +1185,7 @@ class N3VLInt3<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), itin,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2, DPR:$src3), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst,
(TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2), (TyD DPR:$src3))))]>;
@@ -1232,7 +1194,8 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, DPR:$src2, DPR_VFP2:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
@@ -1244,7 +1207,8 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
- (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
+ (ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane),
+ NVMulSLFrm, itin,
OpcodeStr, Dt, "$dst, $src2, $src3[$lane]", "$src1 = $dst",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (ResTy QPR:$src1),
@@ -1257,7 +1221,7 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), IIC_VBINi4D,
+ (outs DPR:$dst), (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINi4D,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set DPR:$dst, (TyD (IntOp (TyQ QPR:$src1), (TyQ QPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1268,7 +1232,7 @@ class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType TyQ, ValueType TyD, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (TyQ (IntOp (TyD DPR:$src1), (TyD DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1277,8 +1241,8 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
- (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ (outs QPR:$dst), (ins DPR:$src1, DPR_VFP2:$src2, nohash_imm:$lane),
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
@@ -1288,7 +1252,7 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ NVMulSLFrm, itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -1299,7 +1263,7 @@ class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD,
Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), IIC_VSUBiD,
+ (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
[(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> {
let isCommutable = Commutable;
@@ -1344,17 +1308,17 @@ class N2VQPLInt2<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
// Shift by immediate,
// both double- and quad-register.
class N2VDSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), itin,
+ (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), f, itin,
OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (Ty (OpNode (Ty DPR:$src), (i32 imm:$SIMM))))]>;
class N2VQSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
+ Format f, InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
+ (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), f, itin,
OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (Ty (OpNode (Ty QPR:$src), (i32 imm:$SIMM))))]>;
@@ -1363,8 +1327,8 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VSHLiD,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
+ (outs QPR:$dst), (ins DPR:$src, i32imm:$SIMM), N2RegVShLFrm,
+ IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy DPR:$src),
(i32 imm:$SIMM))))]>;
@@ -1373,7 +1337,7 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2VImm<op24, op23, op11_8, op7, op6, op4,
- (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), itin,
+ (outs DPR:$dst), (ins QPR:$src, i32imm:$SIMM), N2RegVShRFrm, itin,
OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy QPR:$src),
(i32 imm:$SIMM))))]>;
@@ -1383,14 +1347,14 @@ class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
class N2VDShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VPALiD,
+ (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set DPR:$dst, (Ty (add DPR:$src1,
(Ty (ShOp DPR:$src2, (i32 imm:$SIMM))))))]>;
class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VPALiD,
+ (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), N2RegVShRFrm, IIC_VPALiD,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set QPR:$dst, (Ty (add QPR:$src1,
(Ty (ShOp QPR:$src2, (i32 imm:$SIMM))))))]>;
@@ -1398,15 +1362,15 @@ class N2VQShAdd<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// Shift by immediate and insert,
// both double- and quad-register.
class N2VDShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), IIC_VSHLiD,
+ (ins DPR:$src1, DPR:$src2, i32imm:$SIMM), f, IIC_VSHLiD,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set DPR:$dst, (Ty (ShOp DPR:$src1, DPR:$src2, (i32 imm:$SIMM))))]>;
class N2VQShIns<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
- string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
+ Format f, string OpcodeStr, string Dt, ValueType Ty,SDNode ShOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), IIC_VSHLiQ,
+ (ins QPR:$src1, QPR:$src2, i32imm:$SIMM), f, IIC_VSHLiQ,
OpcodeStr, Dt, "$dst, $src2, $SIMM", "$src1 = $dst",
[(set QPR:$dst, (Ty (ShOp QPR:$src1, QPR:$src2, (i32 imm:$SIMM))))]>;
@@ -1416,15 +1380,15 @@ class N2VCvtD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 0, op4,
- (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), IIC_VUNAD,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
+ (outs DPR:$dst), (ins DPR:$src, i32imm:$SIMM), NVCVTFrm,
+ IIC_VUNAD, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set DPR:$dst, (ResTy (IntOp (OpTy DPR:$src), (i32 imm:$SIMM))))]>;
class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
Intrinsic IntOp>
: N2VImm<op24, op23, op11_8, op7, 1, op4,
- (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), IIC_VUNAQ,
- OpcodeStr, Dt, "$dst, $src, $SIMM", "",
+ (outs QPR:$dst), (ins QPR:$src, i32imm:$SIMM), NVCVTFrm,
+ IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src, $SIMM", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src), (i32 imm:$SIMM))))]>;
//===----------------------------------------------------------------------===//
@@ -1568,24 +1532,24 @@ multiclass N2VLInt_QHS<bits<2> op24_23, bits<5> op11_7, bit op6, bit op4,
// Neon 3-register vector intrinsics.
// First with only element sizes of 16 and 32 bits:
-multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4,
+multiclass N3VInt_HS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0> {
// 64-bit vector types.
- def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, itinD16,
+ def v4i16 : N3VDInt<op24, op23, 0b01, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "16"),
v4i16, v4i16, IntOp, Commutable>;
- def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, itinD32,
+ def v2i32 : N3VDInt<op24, op23, 0b10, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "32"),
v2i32, v2i32, IntOp, Commutable>;
// 128-bit vector types.
- def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, itinQ16,
+ def v8i16 : N3VQInt<op24, op23, 0b01, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "16"),
v8i16, v8i16, IntOp, Commutable>;
- def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, itinQ32,
+ def v4i32 : N3VQInt<op24, op23, 0b10, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "32"),
v4i32, v4i32, IntOp, Commutable>;
}
@@ -1605,38 +1569,37 @@ multiclass N3VIntSL_HS<bits<4> op11_8,
}
// ....then also with element size of 8 bits:
-multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
+multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0>
- : N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
+ : N3VInt_HS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
- def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
+ def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, f, itinD16,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, IntOp, Commutable>;
- def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
+ def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, f, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
}
// ....then also with element size of 64 bits:
-multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
+multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4, Format f,
InstrItinClass itinD16, InstrItinClass itinD32,
InstrItinClass itinQ16, InstrItinClass itinQ32,
string OpcodeStr, string Dt,
Intrinsic IntOp, bit Commutable = 0>
- : N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
+ : N3VInt_QHS<op24, op23, op11_8, op4, f, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
- def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
+ def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, f, itinD32,
OpcodeStr, !strconcat(Dt, "64"),
v1i64, v1i64, IntOp, Commutable>;
- def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
+ def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, f, itinQ32,
OpcodeStr, !strconcat(Dt, "64"),
v2i64, v2i64, IntOp, Commutable>;
}
-
// Neon Narrowing 3-register vector intrinsics,
// source operand element sizes of 16, 32 and 64 bits:
multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1866,46 +1829,46 @@ multiclass N2VPLInt2_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
// Neon 2-register vector shift by immediate,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VSh_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- SDNode OpNode> {
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ SDNode OpNode, Format f> {
// 64-bit vector types.
- def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
+ def v8i8 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "8"), v8i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
+ def v4i16 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "16"), v4i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, itin,
+ def v2i32 : N2VDSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "32"), v2i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, itin,
+ def v1i64 : N2VDSh<op24, op23, op11_8, 1, op4, f, itin,
OpcodeStr, !strconcat(Dt, "64"), v1i64, OpNode>;
// imm6 = xxxxxx
// 128-bit vector types.
- def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
+ def v16i8 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "8"), v16i8, OpNode> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
- def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
+ def v8i16 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "16"), v8i16, OpNode> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
- def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, itin,
+ def v4i32 : N2VQSh<op24, op23, op11_8, 0, op4, f, itin,
OpcodeStr, !strconcat(Dt, "32"), v4i32, OpNode> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
- def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, itin,
+ def v2i64 : N2VQSh<op24, op23, op11_8, 1, op4, f, itin,
OpcodeStr, !strconcat(Dt, "64"), v2i64, OpNode>;
// imm6 = xxxxxx
}
-
// Neon Shift-Accumulate vector operations,
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
@@ -1947,41 +1910,43 @@ multiclass N2VShAdd_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
// Neon Shift-Insert vector operations,
+// with f of either N2RegVShLFrm or N2RegVShRFrm
// element sizes of 8, 16, 32 and 64 bits:
multiclass N2VShIns_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
- string OpcodeStr, SDNode ShOp> {
+ string OpcodeStr, SDNode ShOp,
+ Format f> {
// 64-bit vector types.
def v8i8 : N2VDShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "8", v8i8, ShOp> {
+ f, OpcodeStr, "8", v8i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v4i16 : N2VDShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "16", v4i16, ShOp> {
+ f, OpcodeStr, "16", v4i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v2i32 : N2VDShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "32", v2i32, ShOp> {
+ f, OpcodeStr, "32", v2i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v1i64 : N2VDShIns<op24, op23, op11_8, 1, op4,
- OpcodeStr, "64", v1i64, ShOp>;
+ f, OpcodeStr, "64", v1i64, ShOp>;
// imm6 = xxxxxx
// 128-bit vector types.
def v16i8 : N2VQShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "8", v16i8, ShOp> {
+ f, OpcodeStr, "8", v16i8, ShOp> {
let Inst{21-19} = 0b001; // imm6 = 001xxx
}
def v8i16 : N2VQShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "16", v8i16, ShOp> {
+ f, OpcodeStr, "16", v8i16, ShOp> {
let Inst{21-20} = 0b01; // imm6 = 01xxxx
}
def v4i32 : N2VQShIns<op24, op23, op11_8, 0, op4,
- OpcodeStr, "32", v4i32, ShOp> {
+ f, OpcodeStr, "32", v4i32, ShOp> {
let Inst{21} = 0b1; // imm6 = 1xxxxx
}
def v2i64 : N2VQShIns<op24, op23, op11_8, 1, op4,
- OpcodeStr, "64", v2i64, ShOp>;
+ f, OpcodeStr, "64", v2i64, ShOp>;
// imm6 = xxxxxx
}
@@ -2044,20 +2009,26 @@ defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, "vaddl", "u",
defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>;
defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>;
// VHADD : Vector Halving Add
-defm VHADDs : N3VInt_QHS<0,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhadd", "s", int_arm_neon_vhadds, 1>;
-defm VHADDu : N3VInt_QHS<1,0,0b0000,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vhadd", "u", int_arm_neon_vhaddu, 1>;
+defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "s", int_arm_neon_vhadds, 1>;
+defm VHADDu : N3VInt_QHS<1, 0, 0b0000, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vhadd", "u", int_arm_neon_vhaddu, 1>;
// VRHADD : Vector Rounding Halving Add
-defm VRHADDs : N3VInt_QHS<0,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vrhadd", "s", int_arm_neon_vrhadds, 1>;
-defm VRHADDu : N3VInt_QHS<1,0,0b0001,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
+defm VRHADDs : N3VInt_QHS<0, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "s", int_arm_neon_vrhadds, 1>;
+defm VRHADDu : N3VInt_QHS<1, 0, 0b0001, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vrhadd", "u", int_arm_neon_vrhaddu, 1>;
// VQADD : Vector Saturating Add
-defm VQADDs : N3VInt_QHSD<0,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqadd", "s", int_arm_neon_vqadds, 1>;
-defm VQADDu : N3VInt_QHSD<1,0,0b0000,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>;
+defm VQADDs : N3VInt_QHSD<0, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "s", int_arm_neon_vqadds, 1>;
+defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vqadd", "u", int_arm_neon_vqaddu, 1>;
// VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q)
defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i",
int_arm_neon_vaddhn, 1>;
@@ -2070,10 +2041,10 @@ defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i",
// VMUL : Vector Multiply (integer, polynomial and floating-point)
defm VMUL : N3V_QHS<0, 0, 0b1001, 1, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q, "vmul", "i", mul, 1>;
-def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
- v8i8, v8i8, int_arm_neon_vmulp, 1>;
-def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
- v16i8, v16i8, int_arm_neon_vmulp, 1>;
+def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16D, "vmul",
+ "p8", v8i8, v8i8, int_arm_neon_vmulp, 1>;
+def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, N3RegFrm, IIC_VMULi16Q, "vmul",
+ "p8", v16i8, v16i8, int_arm_neon_vmulp, 1>;
def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
@@ -2103,7 +2074,7 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
-defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
+defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
"vqdmulh", "s", int_arm_neon_vqdmulh, 1>;
defm VQDMULHsl: N3VIntSL_HS<0b1100, IIC_VMULi16D, IIC_VMULi32D,
@@ -2125,8 +2096,8 @@ def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
(SubReg_i32_lane imm:$lane)))>;
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
-defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, IIC_VMULi16D, IIC_VMULi32D,
- IIC_VMULi16Q, IIC_VMULi32Q,
+defm VQRDMULH : N3VInt_HS<1, 0, 0b1011, 0, N3RegFrm,
+ IIC_VMULi16D,IIC_VMULi32D,IIC_VMULi16Q,IIC_VMULi32Q,
"vqrdmulh", "s", int_arm_neon_vqrdmulh, 1>;
defm VQRDMULHsl : N3VIntSL_HS<0b1101, IIC_VMULi16D, IIC_VMULi32D,
IIC_VMULi16Q, IIC_VMULi32Q,
@@ -2285,18 +2256,18 @@ defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, "vsubl", "u",
defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>;
defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>;
// VHSUB : Vector Halving Subtract
-defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vhsub", "s", int_arm_neon_vhsubs, 0>;
-defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VHSUBu : N3VInt_QHS<1, 0, 0b0010, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vhsub", "u", int_arm_neon_vhsubu, 0>;
// VQSUB : Vector Saturing Subtract
-defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VQSUBs : N3VInt_QHSD<0, 0, 0b0010, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqsub", "s", int_arm_neon_vqsubs, 0>;
-defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vqsub", "u", int_arm_neon_vqsubu, 0>;
// VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q)
defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i",
@@ -2323,8 +2294,8 @@ defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "u", NEONvcgeu, 0>;
-def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
- v2i32, v2f32, NEONvcge, 0>;
+def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32,
+ NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
// For disassembly only.
@@ -2351,21 +2322,27 @@ defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
"$dst, $src, #0">;
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
-def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
- v2i32, v2f32, int_arm_neon_vacged, 0>;
-def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, IIC_VBINQ, "vacge", "f32",
- v4i32, v4f32, int_arm_neon_vacgeq, 0>;
+def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacge",
+ "f32", v2i32, v2f32, int_arm_neon_vacged, 0>;
+def VACGEq : N3VQInt<1, 0, 0b00, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacge",
+ "f32", v4i32, v4f32, int_arm_neon_vacgeq, 0>;
// VACGT : Vector Absolute Compare Greater Than (aka VCAGT)
-def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, IIC_VBIND, "vacgt", "f32",
- v2i32, v2f32, int_arm_neon_vacgtd, 0>;
-def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, IIC_VBINQ, "vacgt", "f32",
- v4i32, v4f32, int_arm_neon_vacgtq, 0>;
+def VACGTd : N3VDInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBIND, "vacgt",
+ "f32", v2i32, v2f32, int_arm_neon_vacgtd, 0>;
+def VACGTq : N3VQInt<1, 0, 0b10, 0b1110, 1, N3RegFrm, IIC_VBINQ, "vacgt",
+ "f32", v4i32, v4f32, int_arm_neon_vacgtq, 0>;
// VTST : Vector Test Bits
defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vtst", "", NEONvtst, 1>;
// Vector Bitwise Operations.
+def vnot8 : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>;
+def vnot16 : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+
+
// VAND : Vector Bitwise AND
def VANDd : N3VDX<0, 0, 0b00, 0b0001, 1, IIC_VBINiD, "vand",
v2i32, v2i32, and, 1>;
@@ -2386,74 +2363,80 @@ def VORRq : N3VQX<0, 0, 0b10, 0b0001, 1, IIC_VBINiQ, "vorr",
// VBIC : Vector Bitwise Bit Clear (AND NOT)
def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
- "vbic", "$dst, $src1, $src2", "",
- [(set DPR:$dst, (v2i32 (and DPR:$src1,
- (vnot_conv DPR:$src2))))]>;
+ (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
+ "vbic", "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (v2i32 (and DPR:$src1,
+ (vnot8 DPR:$src2))))]>;
def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
- "vbic", "$dst, $src1, $src2", "",
- [(set QPR:$dst, (v4i32 (and QPR:$src1,
- (vnot_conv QPR:$src2))))]>;
+ (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
+ "vbic", "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (v4i32 (and QPR:$src1,
+ (vnot16 QPR:$src2))))]>;
// VORN : Vector Bitwise OR NOT
def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2), IIC_VBINiD,
- "vorn", "$dst, $src1, $src2", "",
- [(set DPR:$dst, (v2i32 (or DPR:$src1,
- (vnot_conv DPR:$src2))))]>;
+ (ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
+ "vorn", "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (v2i32 (or DPR:$src1,
+ (vnot8 DPR:$src2))))]>;
def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2), IIC_VBINiQ,
- "vorn", "$dst, $src1, $src2", "",
- [(set QPR:$dst, (v4i32 (or QPR:$src1,
- (vnot_conv QPR:$src2))))]>;
+ (ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
+ "vorn", "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (v4i32 (or QPR:$src1,
+ (vnot16 QPR:$src2))))]>;
// VMVN : Vector Bitwise NOT
def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
- (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
- "vmvn", "$dst, $src", "",
- [(set DPR:$dst, (v2i32 (vnot DPR:$src)))]>;
+ (outs DPR:$dst), (ins DPR:$src), IIC_VSHLiD,
+ "vmvn", "$dst, $src", "",
+ [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
- (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
- "vmvn", "$dst, $src", "",
- [(set QPR:$dst, (v4i32 (vnot QPR:$src)))]>;
-def : Pat<(v2i32 (vnot_conv DPR:$src)), (VMVNd DPR:$src)>;
-def : Pat<(v4i32 (vnot_conv QPR:$src)), (VMVNq QPR:$src)>;
+ (outs QPR:$dst), (ins QPR:$src), IIC_VSHLiD,
+ "vmvn", "$dst, $src", "",
+ [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
+def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
- (ins DPR:$src1, DPR:$src2, DPR:$src3), IIC_VCNTiD,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set DPR:$dst,
- (v2i32 (or (and DPR:$src2, DPR:$src1),
- (and DPR:$src3, (vnot_conv DPR:$src1)))))]>;
+ (ins DPR:$src1, DPR:$src2, DPR:$src3),
+ N3RegFrm, IIC_VCNTiD,
+ "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
+ [(set DPR:$dst,
+ (v2i32 (or (and DPR:$src2, DPR:$src1),
+ (and DPR:$src3, (vnot8 DPR:$src1)))))]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
- (ins QPR:$src1, QPR:$src2, QPR:$src3), IIC_VCNTiQ,
- "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
- [(set QPR:$dst,
- (v4i32 (or (and QPR:$src2, QPR:$src1),
- (and QPR:$src3, (vnot_conv QPR:$src1)))))]>;
+ (ins QPR:$src1, QPR:$src2, QPR:$src3),
+ N3RegFrm, IIC_VCNTiQ,
+ "vbsl", "$dst, $src2, $src3", "$src1 = $dst",
+ [(set QPR:$dst,
+ (v4i32 (or (and QPR:$src2, QPR:$src1),
+ (and QPR:$src3, (vnot16 QPR:$src1)))))]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VBINiD, "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiD,
+ "vbif", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
- IIC_VBINiQ, "vbif", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiQ,
+ "vbif", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
// VBIT : Vector Bitwise Insert if True
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2, DPR:$src3),
- IIC_VBINiD, "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiD,
+ "vbit", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2, QPR:$src3),
- IIC_VBINiQ, "vbit", "$dst, $src2, $src3", "$src1 = $dst",
+ N3RegFrm, IIC_VBINiQ,
+ "vbit", "$dst, $src2, $src3", "$src1 = $dst",
[/* For disassembly only; pattern left blank */]>;
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
@@ -2463,15 +2446,15 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// Vector Absolute Differences.
// VABD : Vector Absolute Difference
-defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vabd", "s", int_arm_neon_vabds, 0>;
-defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, IIC_VBINi4D, IIC_VBINi4D,
- IIC_VBINi4Q, IIC_VBINi4Q,
+defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
"vabd", "u", int_arm_neon_vabdu, 0>;
-def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, IIC_VBIND,
+def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 0>;
-def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, IIC_VBINQ,
+def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
"vabd", "f32", v4f32, v4f32, int_arm_neon_vabds, 0>;
// VABDL : Vector Absolute Difference Long (Q = | D - D |)
@@ -2491,36 +2474,40 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
// Vector Maximum and Minimum.
// VMAX : Vector Maximum
-defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
-defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
-def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
- v2f32, v2f32, int_arm_neon_vmaxs, 1>;
-def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
- v4f32, v4f32, int_arm_neon_vmaxs, 1>;
+defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmax", "s", int_arm_neon_vmaxs, 1>;
+defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmax", "u", int_arm_neon_vmaxu, 1>;
+def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmax",
+ "f32", v2f32, v2f32, int_arm_neon_vmaxs, 1>;
+def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax",
+ "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>;
// VMIN : Vector Minimum
-defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
-defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
- IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
-def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
- v2f32, v2f32, int_arm_neon_vmins, 1>;
-def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, IIC_VBINQ, "vmin", "f32",
- v4f32, v4f32, int_arm_neon_vmins, 1>;
+defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmin", "s", int_arm_neon_vmins, 1>;
+defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, N3RegFrm,
+ IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q,
+ "vmin", "u", int_arm_neon_vminu, 1>;
+def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBIND, "vmin",
+ "f32", v2f32, v2f32, int_arm_neon_vmins, 1>;
+def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin",
+ "f32", v4f32, v4f32, int_arm_neon_vmins, 1>;
// Vector Pairwise Operations.
// VPADD : Vector Pairwise Add
-def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, IIC_VBINiD, "vpadd", "i8",
- v8i8, v8i8, int_arm_neon_vpadd, 0>;
-def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, IIC_VBINiD, "vpadd", "i16",
- v4i16, v4i16, int_arm_neon_vpadd, 0>;
-def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, IIC_VBINiD, "vpadd", "i32",
- v2i32, v2i32, int_arm_neon_vpadd, 0>;
-def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, IIC_VBIND, "vpadd", "f32",
- v2f32, v2f32, int_arm_neon_vpadd, 0>;
+def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
+ "i8", v8i8, v8i8, int_arm_neon_vpadd, 0>;
+def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
+ "i16", v4i16, v4i16, int_arm_neon_vpadd, 0>;
+def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, N3RegFrm, IIC_VBINiD, "vpadd",
+ "i32", v2i32, v2i32, int_arm_neon_vpadd, 0>;
+def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, N3RegFrm, IIC_VBIND, "vpadd",
+ "f32", v2f32, v2f32, int_arm_neon_vpadd, 0>;
// VPADDL : Vector Pairwise Add Long
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl", "s",
@@ -2535,36 +2522,36 @@ defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal", "u",
int_arm_neon_vpadalu>;
// VPMAX : Vector Pairwise Maximum
-def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "s8",
- v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
-def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "s16",
- v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
-def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "s32",
- v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
-def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, IIC_VBINi4D, "vpmax", "u8",
- v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
-def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, IIC_VBINi4D, "vpmax", "u16",
- v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
-def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, IIC_VBINi4D, "vpmax", "u32",
- v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
-def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, IIC_VBINi4D, "vpmax", "f32",
- v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
+def VPMAXs8 : N3VDInt<0, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "s8", v8i8, v8i8, int_arm_neon_vpmaxs, 0>;
+def VPMAXs16 : N3VDInt<0, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "s16", v4i16, v4i16, int_arm_neon_vpmaxs, 0>;
+def VPMAXs32 : N3VDInt<0, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "s32", v2i32, v2i32, int_arm_neon_vpmaxs, 0>;
+def VPMAXu8 : N3VDInt<1, 0, 0b00, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "u8", v8i8, v8i8, int_arm_neon_vpmaxu, 0>;
+def VPMAXu16 : N3VDInt<1, 0, 0b01, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "u16", v4i16, v4i16, int_arm_neon_vpmaxu, 0>;
+def VPMAXu32 : N3VDInt<1, 0, 0b10, 0b1010, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "u32", v2i32, v2i32, int_arm_neon_vpmaxu, 0>;
+def VPMAXf : N3VDInt<1, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmax",
+ "f32", v2f32, v2f32, int_arm_neon_vpmaxs, 0>;
// VPMIN : Vector Pairwise Minimum
-def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "s8",
- v8i8, v8i8, int_arm_neon_vpmins, 0>;
-def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "s16",
- v4i16, v4i16, int_arm_neon_vpmins, 0>;
-def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "s32",
- v2i32, v2i32, int_arm_neon_vpmins, 0>;
-def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, IIC_VBINi4D, "vpmin", "u8",
- v8i8, v8i8, int_arm_neon_vpminu, 0>;
-def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, IIC_VBINi4D, "vpmin", "u16",
- v4i16, v4i16, int_arm_neon_vpminu, 0>;
-def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, IIC_VBINi4D, "vpmin", "u32",
- v2i32, v2i32, int_arm_neon_vpminu, 0>;
-def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, IIC_VBINi4D, "vpmin", "f32",
- v2f32, v2f32, int_arm_neon_vpmins, 0>;
+def VPMINs8 : N3VDInt<0, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "s8", v8i8, v8i8, int_arm_neon_vpmins, 0>;
+def VPMINs16 : N3VDInt<0, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "s16", v4i16, v4i16, int_arm_neon_vpmins, 0>;
+def VPMINs32 : N3VDInt<0, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "s32", v2i32, v2i32, int_arm_neon_vpmins, 0>;
+def VPMINu8 : N3VDInt<1, 0, 0b00, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "u8", v8i8, v8i8, int_arm_neon_vpminu, 0>;
+def VPMINu16 : N3VDInt<1, 0, 0b01, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "u16", v4i16, v4i16, int_arm_neon_vpminu, 0>;
+def VPMINu32 : N3VDInt<1, 0, 0b10, 0b1010, 1, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "u32", v2i32, v2i32, int_arm_neon_vpminu, 0>;
+def VPMINf : N3VDInt<1, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINi4D, "vpmin",
+ "f32", v2f32, v2f32, int_arm_neon_vpmins, 0>;
// Vector Reciprocal and Reciprocal Square Root Estimate and Step.
@@ -2583,10 +2570,10 @@ def VRECPEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01010, 0,
v4f32, v4f32, int_arm_neon_vrecpe>;
// VRECPS : Vector Reciprocal Step
-def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1,
+def VRECPSfd : N3VDInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSD, "vrecps", "f32",
v2f32, v2f32, int_arm_neon_vrecps, 1>;
-def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1,
+def VRECPSfq : N3VQInt<0, 0, 0b00, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrecps", "f32",
v4f32, v4f32, int_arm_neon_vrecps, 1>;
@@ -2605,25 +2592,30 @@ def VRSQRTEfq : N2VQInt<0b11, 0b11, 0b10, 0b11, 0b01011, 0,
v4f32, v4f32, int_arm_neon_vrsqrte>;
// VRSQRTS : Vector Reciprocal Square Root Step
-def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1,
+def VRSQRTSfd : N3VDInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSD, "vrsqrts", "f32",
v2f32, v2f32, int_arm_neon_vrsqrts, 1>;
-def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1,
+def VRSQRTSfq : N3VQInt<0, 0, 0b10, 0b1111, 1, N3RegFrm,
IIC_VRECSQ, "vrsqrts", "f32",
v4f32, v4f32, int_arm_neon_vrsqrts, 1>;
// Vector Shifts.
// VSHL : Vector Shift
-defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
- IIC_VSHLiQ, "vshl", "s", int_arm_neon_vshifts, 0>;
-defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ,
- IIC_VSHLiQ, "vshl", "u", int_arm_neon_vshiftu, 0>;
+defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "s", int_arm_neon_vshifts, 0>;
+defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, N3RegVShFrm,
+ IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ,
+ "vshl", "u", int_arm_neon_vshiftu, 0>;
// VSHL : Vector Shift Left (Immediate)
-defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl>;
+defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl", "i", NEONvshl,
+ N2RegVShLFrm>;
// VSHR : Vector Shift Right (Immediate)
-defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs>;
-defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru>;
+defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "s", NEONvshrs,
+ N2RegVShRFrm>;
+defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr", "u", NEONvshru,
+ N2RegVShRFrm>;
// VSHLL : Vector Shift Left Long
defm VSHLLs : N2VLSh_QHS<0, 1, 0b1010, 0, 0, 1, "vshll", "s", NEONvshlls>;
@@ -2649,28 +2641,37 @@ defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
NEONvshrn>;
// VRSHL : Vector Rounding Shift
-defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>;
-defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>;
+defm VRSHLs : N3VInt_QHSD<0, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "s", int_arm_neon_vrshifts, 0>;
+defm VRSHLu : N3VInt_QHSD<1, 0, 0b0101, 0, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vrshl", "u", int_arm_neon_vrshiftu, 0>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
-defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
+defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs,
+ N2RegVShRFrm>;
+defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru,
+ N2RegVShRFrm>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
NEONvrshrn>;
// VQSHL : Vector Saturating Shift
-defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>;
-defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>;
+defm VQSHLs : N3VInt_QHSD<0, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "s", int_arm_neon_vqshifts, 0>;
+defm VQSHLu : N3VInt_QHSD<1, 0, 0b0100, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqshl", "u", int_arm_neon_vqshiftu, 0>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
-defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
+defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s",NEONvqshls,
+ N2RegVShLFrm>;
+defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u",NEONvqshlu,
+ N2RegVShLFrm>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>;
+defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D,"vqshlu","s",NEONvqshlsu,
+ N2RegVShLFrm>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -2683,12 +2684,12 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
-defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqrshl", "s",
- int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqrshl", "u",
- int_arm_neon_vqrshiftu, 0>;
+defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "s", int_arm_neon_vqrshifts, 0>;
+defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, N3RegVShFrm,
+ IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q, IIC_VSHLi4Q,
+ "vqrshl", "u", int_arm_neon_vqrshiftu, 0>;
// VQRSHRN : Vector Saturating Rounding Shift Right and Narrow
defm VQRSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 1, 1, IIC_VSHLi4D, "vqrshrn", "s",
@@ -2708,9 +2709,9 @@ defm VRSRAs : N2VShAdd_QHSD<0, 1, 0b0011, 1, "vrsra", "s", NEONvrshrs>;
defm VRSRAu : N2VShAdd_QHSD<1, 1, 0b0011, 1, "vrsra", "u", NEONvrshru>;
// VSLI : Vector Shift Left and Insert
-defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli>;
+defm VSLI : N2VShIns_QHSD<1, 1, 0b0101, 1, "vsli", NEONvsli, N2RegVShLFrm>;
// VSRI : Vector Shift Right and Insert
-defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri>;
+defm VSRI : N2VShIns_QHSD<1, 1, 0b0100, 1, "vsri", NEONvsri, N2RegVShRFrm>;
// Vector Absolute and Saturating Absolute.
@@ -2732,19 +2733,22 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
// Vector Negate.
-def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
-def vneg_conv : PatFrag<(ops node:$in), (sub immAllZerosV_bc, node:$in)>;
+def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
+def vneg8 : PatFrag<(ops node:$in),
+ (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>;
+def vneg16 : PatFrag<(ops node:$in),
+ (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (vneg DPR:$src)))]>;
+ [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>;
class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (vneg QPR:$src)))]>;
+ [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>;
-// VNEG : Vector Negate
+// VNEG : Vector Negate (integer)
def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
def VNEGs16d : VNEGD<0b01, "vneg", "s16", v4i16>;
def VNEGs32d : VNEGD<0b10, "vneg", "s32", v2i32>;
@@ -2762,12 +2766,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
"vneg", "f32", "$dst, $src", "",
[(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
-def : Pat<(v8i8 (vneg_conv DPR:$src)), (VNEGs8d DPR:$src)>;
-def : Pat<(v4i16 (vneg_conv DPR:$src)), (VNEGs16d DPR:$src)>;
-def : Pat<(v2i32 (vneg_conv DPR:$src)), (VNEGs32d DPR:$src)>;
-def : Pat<(v16i8 (vneg_conv QPR:$src)), (VNEGs8q QPR:$src)>;
-def : Pat<(v8i16 (vneg_conv QPR:$src)), (VNEGs16q QPR:$src)>;
-def : Pat<(v4i32 (vneg_conv QPR:$src)), (VNEGs32q QPR:$src)>;
+def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>;
// VQNEG : Vector Saturating Negate
defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
@@ -2805,9 +2809,9 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
// VMOV : Vector Move (Register)
def VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
- IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+ N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
def VMOVQ : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
- IIC_VMOVD, "vmov", "$dst, $src", "", []>;
+ N3RegFrm, IIC_VMOVD, "vmov", "$dst, $src", "", []>;
// VMOV : Vector Move (Immediate)
@@ -3048,30 +3052,29 @@ def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
// VDUP : Vector Duplicate Lane (from scalar to all elements)
-class VDUPLND<bits<2> op19_18, bits<2> op17_16,
- string OpcodeStr, string Dt, ValueType Ty>
- : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 0, 0,
- (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src[$lane]", "",
- [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
+class VDUPLND<bits<4> op19_16, string OpcodeStr, string Dt,
+ ValueType Ty>
+ : NVDupLane<op19_16, 0, (outs DPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
+ [(set DPR:$dst, (Ty (NEONvduplane (Ty DPR:$src), imm:$lane)))]>;
-class VDUPLNQ<bits<2> op19_18, bits<2> op17_16, string OpcodeStr, string Dt,
+class VDUPLNQ<bits<4> op19_16, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy>
- : N2V<0b11, 0b11, op19_18, op17_16, 0b11000, 1, 0,
- (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane), IIC_VMOVD,
- OpcodeStr, Dt, "$dst, $src[$lane]", "",
- [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src), imm:$lane)))]>;
+ : NVDupLane<op19_16, 1, (outs QPR:$dst), (ins DPR:$src, nohash_imm:$lane),
+ IIC_VMOVD, OpcodeStr, Dt, "$dst, $src[$lane]",
+ [(set QPR:$dst, (ResTy (NEONvduplane (OpTy DPR:$src),
+ imm:$lane)))]>;
// Inst{19-16} is partially specified depending on the element size.
-def VDUPLN8d : VDUPLND<{?,?}, {?,1}, "vdup", "8", v8i8>;
-def VDUPLN16d : VDUPLND<{?,?}, {1,0}, "vdup", "16", v4i16>;
-def VDUPLN32d : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2i32>;
-def VDUPLNfd : VDUPLND<{?,1}, {0,0}, "vdup", "32", v2f32>;
-def VDUPLN8q : VDUPLNQ<{?,?}, {?,1}, "vdup", "8", v16i8, v8i8>;
-def VDUPLN16q : VDUPLNQ<{?,?}, {1,0}, "vdup", "16", v8i16, v4i16>;
-def VDUPLN32q : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4i32, v2i32>;
-def VDUPLNfq : VDUPLNQ<{?,1}, {0,0}, "vdup", "32", v4f32, v2f32>;
+def VDUPLN8d : VDUPLND<{?,?,?,1}, "vdup", "8", v8i8>;
+def VDUPLN16d : VDUPLND<{?,?,1,0}, "vdup", "16", v4i16>;
+def VDUPLN32d : VDUPLND<{?,1,0,0}, "vdup", "32", v2i32>;
+def VDUPLNfd : VDUPLND<{?,1,0,0}, "vdup", "32", v2f32>;
+def VDUPLN8q : VDUPLNQ<{?,?,?,1}, "vdup", "8", v16i8, v8i8>;
+def VDUPLN16q : VDUPLNQ<{?,?,1,0}, "vdup", "16", v8i16, v4i16>;
+def VDUPLN32q : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4i32, v2i32>;
+def VDUPLNfq : VDUPLNQ<{?,1,0,0}, "vdup", "32", v4f32, v2f32>;
def : Pat<(v16i8 (NEONvduplane (v16i8 QPR:$src), imm:$lane)),
(v16i8 (VDUPLN8q (v8i8 (EXTRACT_SUBREG QPR:$src,
@@ -3233,15 +3236,15 @@ def VREV16q8 : VREV16Q<0b00, "vrev16", "8", v16i8>;
class VEXTd<string OpcodeStr, string Dt, ValueType Ty>
: N3V<0,1,0b11,{?,?,?,?},0,0, (outs DPR:$dst),
- (ins DPR:$lhs, DPR:$rhs, i32imm:$index), IIC_VEXTD,
- OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+ (ins DPR:$lhs, DPR:$rhs, i32imm:$index), NVExtFrm,
+ IIC_VEXTD, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
[(set DPR:$dst, (Ty (NEONvext (Ty DPR:$lhs),
(Ty DPR:$rhs), imm:$index)))]>;
class VEXTq<string OpcodeStr, string Dt, ValueType Ty>
: N3V<0,1,0b11,{?,?,?,?},1,0, (outs QPR:$dst),
- (ins QPR:$lhs, QPR:$rhs, i32imm:$index), IIC_VEXTQ,
- OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
+ (ins QPR:$lhs, QPR:$rhs, i32imm:$index), NVExtFrm,
+ IIC_VEXTQ, OpcodeStr, Dt, "$dst, $lhs, $rhs, $index", "",
[(set QPR:$dst, (Ty (NEONvext (Ty QPR:$lhs),
(Ty QPR:$rhs), imm:$index)))]>;
@@ -3290,25 +3293,26 @@ def VZIPq32 : N2VQShuffle<0b10, 0b00011, IIC_VPERMQ3, "vzip", "32">;
// VTBL : Vector Table Lookup
def VTBL1
: N3V<1,1,0b11,0b1000,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$src), IIC_VTB1,
+ (ins DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTB1,
"vtbl", "8", "$dst, \\{$tbl1\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl1 DPR:$tbl1, DPR:$src)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBL2
: N3V<1,1,0b11,0b1001,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTB2,
+ (ins DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTB2,
"vtbl", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl2
DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBL3
: N3V<1,1,0b11,0b1010,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTB3,
+ (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), NVTBLFrm, IIC_VTB3,
"vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl3
DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBL4
: N3V<1,1,0b11,0b1011,0,0, (outs DPR:$dst),
- (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTB4,
+ (ins DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src),
+ NVTBLFrm, IIC_VTB4,
"vtbl", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src", "",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbl4 DPR:$tbl1, DPR:$tbl2,
DPR:$tbl3, DPR:$tbl4, DPR:$src)))]>;
@@ -3317,26 +3321,27 @@ def VTBL4
// VTBX : Vector Table Extension
def VTBX1
: N3V<1,1,0b11,0b1000,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$src), IIC_VTBX1,
+ (ins DPR:$orig, DPR:$tbl1, DPR:$src), NVTBLFrm, IIC_VTBX1,
"vtbx", "8", "$dst, \\{$tbl1\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx1
DPR:$orig, DPR:$tbl1, DPR:$src)))]>;
let hasExtraSrcRegAllocReq = 1 in {
def VTBX2
: N3V<1,1,0b11,0b1001,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), IIC_VTBX2,
+ (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src), NVTBLFrm, IIC_VTBX2,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx2
DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$src)))]>;
def VTBX3
: N3V<1,1,0b11,0b1010,1,0, (outs DPR:$dst),
- (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src), IIC_VTBX3,
+ (ins DPR:$orig, DPR:$tbl1, DPR:$tbl2, DPR:$tbl3, DPR:$src),
+ NVTBLFrm, IIC_VTBX3,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3\\}, $src", "$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx3 DPR:$orig, DPR:$tbl1,
DPR:$tbl2, DPR:$tbl3, DPR:$src)))]>;
def VTBX4
: N3V<1,1,0b11,0b1011,1,0, (outs DPR:$dst), (ins DPR:$orig, DPR:$tbl1,
- DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), IIC_VTBX4,
+ DPR:$tbl2, DPR:$tbl3, DPR:$tbl4, DPR:$src), NVTBLFrm, IIC_VTBX4,
"vtbx", "8", "$dst, \\{$tbl1, $tbl2, $tbl3, $tbl4\\}, $src",
"$orig = $dst",
[(set DPR:$dst, (v8i8 (int_arm_neon_vtbx4 DPR:$orig, DPR:$tbl1,
@@ -3396,12 +3401,12 @@ def : N3VSPat<fmul, VMULfd_sfp>;
//let neverHasSideEffects = 1 in
//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
-// v2f32, fmul, fadd>;
+// v2f32, fmul, fadd>;
//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
//let neverHasSideEffects = 1 in
//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
-// v2f32, fmul, fsub>;
+// v2f32, fmul, fsub>;
//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
@@ -3421,14 +3426,14 @@ def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
// Vector Maximum used for single-precision FP
let neverHasSideEffects = 1 in
def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
"vmax", "f32", "$dst, $src1, $src2", "", []>;
def : N3VSPat<NEONfmax, VMAXfd_sfp>;
// Vector Minimum used for single-precision FP
let neverHasSideEffects = 1 in
def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), N3RegFrm, IIC_VBIND,
"vmin", "f32", "$dst, $src1, $src2", "", []>;
def : N3VSPat<NEONfmin, VMINfd_sfp>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index aca8230..0458389 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -545,7 +545,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
// FP FMA Operations.
//
-def VMLAD : ADbI<0b11100, 0b00, 0, 0,
+def VMLAD : ADbI_vmlX<0b11100, 0b00, 0, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b),
@@ -558,7 +558,7 @@ def VMLAS : ASbIn<0b11100, 0b00, 0, 0,
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
+def VNMLSD : ADbI_vmlX<0b11100, 0b01, 0, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b),
@@ -571,7 +571,7 @@ def VNMLSS : ASbI<0b11100, 0b01, 0, 0,
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def VMLSD : ADbI<0b11100, 0b00, 1, 0,
+def VMLSD : ADbI_vmlX<0b11100, 0b00, 1, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)),
@@ -589,7 +589,7 @@ def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, (f64 DPR:$b))),
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
(VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-def VNMLAD : ADbI<0b11100, 0b01, 1, 0,
+def VNMLAD : ADbI_vmlX<0b11100, 0b01, 1, 0,
(outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)),
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index bdbec30..cb762a4 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -341,6 +341,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
unsigned PReg = PMO.getReg();
unsigned PRegNum = PMO.isUndef() ? UINT_MAX
: ARMRegisterInfo::getRegisterNumbering(PReg);
+ unsigned Count = 1;
for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
int NewOffset = MemOps[i].Offset;
@@ -350,11 +351,14 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
: ARMRegisterInfo::getRegisterNumbering(Reg);
// AM4 - register numbers in ascending order.
// AM5 - consecutive register numbers in ascending order.
+ // Can only do up to 16 double-word registers per insn.
if (Reg != ARM::SP &&
NewOffset == Offset + (int)Size &&
- ((isAM4 && RegNum > PRegNum) || RegNum == PRegNum+1)) {
+ ((isAM4 && RegNum > PRegNum)
+ || ((Size < 8 || Count < 16) && RegNum == PRegNum+1))) {
Offset += Size;
PRegNum = RegNum;
+ ++Count;
} else {
// Can't merge this in. Try merge the earlier ones first.
MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 2dad7f1..9e55cd8 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -22,10 +22,6 @@ using namespace llvm;
static cl::opt<bool>
ReserveR9("arm-reserve-r9", cl::Hidden,
cl::desc("Reserve R9, making it unavailable as GPR"));
-static cl::opt<bool>
-UseNEONFP("arm-use-neon-fp",
- cl::desc("Use NEON for single-precision FP"),
- cl::init(false), cl::Hidden);
static cl::opt<bool>
UseMOVT("arm-use-movt",
@@ -35,7 +31,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
bool isT)
: ARMArchVersion(V4)
, ARMFPUType(None)
- , UseNEONForSinglePrecisionFP(UseNEONFP)
+ , UseNEONForSinglePrecisionFP(false)
+ , SlowVMLx(false)
, IsThumb(isT)
, ThumbMode(Thumb1)
, PostRAScheduler(false)
@@ -115,14 +112,6 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
if (!isThumb() || hasThumb2())
PostRAScheduler = true;
-
- // Set CPU specific features.
- if (CPUString == "cortex-a8") {
- // On Cortex-a8, it's faster to perform some single-precision FP
- // operations with NEON instructions.
- if (UseNEONFP.getPosition() == 0)
- UseNEONForSinglePrecisionFP = true;
- }
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 2dc81a4..fa56a91 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -50,6 +50,10 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// SlowVMLx - If the VFP2 instructions are available, indicates whether
+ /// the VML[AS] instructions are slow (if so, don't use them).
+ bool SlowVMLx;
+
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -119,6 +123,7 @@ protected:
bool hasNEON() const { return ARMFPUType >= NEON; }
bool useNEONForSinglePrecisionFP() const {
return hasNEON() && UseNEONForSinglePrecisionFP; }
+ bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 4a7a1e4..ba736e3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -841,7 +841,7 @@ GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
<< getFunctionNumber() << '_' << uid << '_' << uid2
<< "_set_" << MBB->getNumber();
- return OutContext.GetOrCreateTemporarySymbol(Name.str());
+ return OutContext.GetOrCreateSymbol(Name.str());
}
MCSymbol *ARMAsmPrinter::
@@ -849,7 +849,7 @@ GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const {
SmallString<60> Name;
raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "JTI"
<< getFunctionNumber() << '_' << uid << '_' << uid2;
- return OutContext.GetOrCreateTemporarySymbol(Name.str());
+ return OutContext.GetOrCreateSymbol(Name.str());
}
void ARMAsmPrinter::printJTBlockOperand(const MachineInstr *MI, int OpNum) {
@@ -1132,6 +1132,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
else
// Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
4/*size*/, 0/*addrspace*/);
@@ -1186,7 +1191,7 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
const char *Prefix = MAI->getPrivateGlobalPrefix();
- MCSymbol *Label =OutContext.GetOrCreateTemporarySymbol(Twine(Prefix)
+ MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 7cb305f..ab2b06b 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -75,7 +75,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
#endif
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCSymbol *ARMMCInstLower::
@@ -91,7 +91,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
#endif
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCOperand ARMMCInstLower::
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index c36fe63..7334259 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -46,10 +46,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
default:
break;
+ case ARM::VLD1q8:
+ case ARM::VLD1q16:
+ case ARM::VLD1q32:
+ case ARM::VLD1q64:
case ARM::VLD2d8:
case ARM::VLD2d16:
case ARM::VLD2d32:
- case ARM::VLD2d64:
case ARM::VLD2LNd8:
case ARM::VLD2LNd16:
case ARM::VLD2LNd32:
@@ -83,7 +86,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VLD3d8:
case ARM::VLD3d16:
case ARM::VLD3d32:
- case ARM::VLD3d64:
+ case ARM::VLD1d64T:
case ARM::VLD3LNd8:
case ARM::VLD3LNd16:
case ARM::VLD3LNd32:
@@ -128,7 +131,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VLD4d8:
case ARM::VLD4d16:
case ARM::VLD4d32:
- case ARM::VLD4d64:
+ case ARM::VLD1d64Q:
case ARM::VLD4LNd8:
case ARM::VLD4LNd16:
case ARM::VLD4LNd32:
@@ -170,10 +173,13 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
Stride = 2;
return true;
+ case ARM::VST1q8:
+ case ARM::VST1q16:
+ case ARM::VST1q32:
+ case ARM::VST1q64:
case ARM::VST2d8:
case ARM::VST2d16:
case ARM::VST2d32:
- case ARM::VST2d64:
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
@@ -207,7 +213,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3d8:
case ARM::VST3d16:
case ARM::VST3d32:
- case ARM::VST3d64:
+ case ARM::VST1d64T:
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
@@ -252,7 +258,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4d8:
case ARM::VST4d16:
case ARM::VST4d32:
- case ARM::VST4d64:
+ case ARM::VST1d64Q:
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 57b65cf..85d5ca0 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -12,6 +12,9 @@ Reimplement 'select' in terms of 'SEL'.
A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+Interesting optimization for PIC codegen on arm-linux:
+http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43129
+
//===---------------------------------------------------------------------===//
Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 29ae631..ad98839 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -200,6 +200,8 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
// It's illegal to emit pop instruction without operands.
if (NumRegs)
MBB.insert(MI, &*MIB);
+ else
+ MF.DeleteMachineInstr(MIB);
return true;
}
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index e4abcdb..55163f9 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -69,7 +69,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == ARM::GPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
@@ -93,7 +93,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
- if (RC == ARM::GPRRegisterClass) {
+ if (RC == ARM::GPRRegisterClass || RC == ARM::tGPRRegisterClass) {
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
MachineMemOperand *MMO =
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 39f0749..d539e08 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -301,7 +301,15 @@ bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TB
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -362,6 +370,11 @@ unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (I->getOpcode() != Alpha::BR &&
I->getOpcode() != Alpha::COND_BRANCH_I &&
I->getOpcode() != Alpha::COND_BRANCH_F)
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index e3c3993..2471688 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -65,23 +65,23 @@ def HI16 : SDNodeXForm<imm, [{
//===----------------------------------------------------------------------===//
def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>;
-def uimm3 : PatLeaf<(imm), [{return isUint<3>(N->getZExtValue());}]>;
-def uimm4 : PatLeaf<(imm), [{return isUint<4>(N->getZExtValue());}]>;
-def uimm5 : PatLeaf<(imm), [{return isUint<5>(N->getZExtValue());}]>;
+def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>;
+def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>;
+def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>;
def uimm5m2 : PatLeaf<(imm), [{
uint64_t value = N->getZExtValue();
- return value % 2 == 0 && isUint<5>(value);
+ return value % 2 == 0 && isUInt<5>(value);
}]>;
def uimm6m4 : PatLeaf<(imm), [{
uint64_t value = N->getZExtValue();
- return value % 4 == 0 && isUint<6>(value);
+ return value % 4 == 0 && isUInt<6>(value);
}]>;
def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>;
def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>;
-def uimm16 : PatLeaf<(imm), [{return isUint<16>(N->getZExtValue());}]>;
+def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>;
def ximm16 : PatLeaf<(imm), [{
int64_t value = N->getSExtValue();
@@ -610,8 +610,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb),
"cc = !cc;", []>;
def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc),
- "$dst = $cc;",
- [/*(set D:$dst, (zext JustCC:$cc))*/]>;
+ "$dst = $cc;", []>;
def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc),
"$dst = cc;", []>;
@@ -859,17 +858,5 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)),
(CALLa tglobaladdr:$dst)>;
def : Pat<(BfinCall (i32 texternalsym:$dst)),
(CALLa texternalsym:$dst)>;
-
-//def : Pat<(sext JustCC:$cc),
-// (NEG (MOVECC_zext JustCC:$cc))>;
-//def : Pat<(anyext JustCC:$cc),
-// (MOVECC_zext JustCC:$cc)>;
-def : Pat<(i16 (zext JustCC:$cc)),
- (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
-def : Pat<(i16 (sext JustCC:$cc)),
- (EXTRACT_SUBREG (NEG (MOVECC_zext JustCC:$cc)), bfin_subreg_lo16)>;
-def : Pat<(i16 (anyext JustCC:$cc)),
- (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>;
-
def : Pat<(i16 (trunc D:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), bfin_subreg_lo16)>;
diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td
index bf02cfe..ce21b08 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsics.td
+++ b/lib/Target/Blackfin/BlackfinIntrinsics.td
@@ -21,14 +21,14 @@ let TargetPrefix = "bfin", isTarget = 1 in {
// Execute csync instruction with workarounds
def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">,
- Intrinsic<[llvm_void_ty]>;
+ Intrinsic<[]>;
// Execute ssync instruction with workarounds
def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">,
- Intrinsic<[llvm_void_ty]>;
+ Intrinsic<[]>;
// Execute idle instruction with workarounds
def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">,
- Intrinsic<[llvm_void_ty]>;
+ Intrinsic<[]>;
}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index b39a342..84dc9ca 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -164,7 +164,7 @@ void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB,
return;
}
- if (isUint<16>(value)) {
+ if (isUInt<16>(value)) {
BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value);
return;
}
@@ -255,13 +255,13 @@ BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
assert(FIPos==1 && "Bad frame index operand");
MI.getOperand(FIPos).ChangeToRegister(BaseReg, false);
MI.getOperand(FIPos+1).setImm(Offset);
- if (isUint<6>(Offset)) {
+ if (isUInt<6>(Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32p_uimm6m4
: BF::LOAD32p_uimm6m4));
return 0;
}
- if (BaseReg == BF::FP && isUint<7>(-Offset)) {
+ if (BaseReg == BF::FP && isUInt<7>(-Offset)) {
MI.setDesc(TII.get(isStore
? BF::STORE32fp_nimm7m4
: BF::LOAD32fp_nimm7m4));
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index c960974..1f21511 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -15,7 +15,6 @@
#ifndef LLVM_TARGET_IBMCELLSPU_H
#define LLVM_TARGET_IBMCELLSPU_H
-#include "llvm/System/DataTypes.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
@@ -25,73 +24,7 @@ namespace llvm {
FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
- /*--== Utility functions/predicates/etc used all over the place: --==*/
- //! Predicate test for a signed 10-bit value
- /*!
- \param Value The input value to be tested
-
- This predicate tests for a signed 10-bit value, returning the 10-bit value
- as a short if true.
- */
- template<typename T>
- inline bool isS10Constant(T Value);
-
- template<>
- inline bool isS10Constant<short>(short Value) {
- int SExtValue = ((int) Value << (32 - 10)) >> (32 - 10);
- return ((Value > 0 && Value <= (1 << 9) - 1)
- || (Value < 0 && (short) SExtValue == Value));
- }
-
- template<>
- inline bool isS10Constant<int>(int Value) {
- return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
- }
-
- template<>
- inline bool isS10Constant<uint32_t>(uint32_t Value) {
- return (Value <= ((1 << 9) - 1));
- }
-
- template<>
- inline bool isS10Constant<int64_t>(int64_t Value) {
- return (Value >= -(1 << 9) && Value <= (1 << 9) - 1);
- }
-
- template<>
- inline bool isS10Constant<uint64_t>(uint64_t Value) {
- return (Value <= ((1 << 9) - 1));
- }
-
- //! Predicate test for an unsigned 10-bit value
- /*!
- \param Value The input value to be tested
-
- This predicate tests for an unsigned 10-bit value, returning the 10-bit value
- as a short if true.
- */
- inline bool isU10Constant(short Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(int Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(uint32_t Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(int64_t Value) {
- return (Value == (Value & 0x3ff));
- }
-
- inline bool isU10Constant(uint64_t Value) {
- return (Value == (Value & 0x3ff));
- }
-
extern Target TheCellSPUTarget;
-
}
// Defines symbolic names for the SPU instructions.
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 396a921..90f8310 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -44,28 +44,28 @@ namespace {
bool
isI64IntS10Immediate(ConstantSDNode *CN)
{
- return isS10Constant(CN->getSExtValue());
+ return isInt<10>(CN->getSExtValue());
}
//! ConstantSDNode predicate for i32 sign-extended, 10-bit immediates
bool
isI32IntS10Immediate(ConstantSDNode *CN)
{
- return isS10Constant(CN->getSExtValue());
+ return isInt<10>(CN->getSExtValue());
}
//! ConstantSDNode predicate for i32 unsigned 10-bit immediate values
bool
isI32IntU10Immediate(ConstantSDNode *CN)
{
- return isU10Constant(CN->getSExtValue());
+ return isUInt<10>(CN->getSExtValue());
}
//! ConstantSDNode predicate for i16 sign-extended, 10-bit immediate values
bool
isI16IntS10Immediate(ConstantSDNode *CN)
{
- return isS10Constant(CN->getSExtValue());
+ return isInt<10>(CN->getSExtValue());
}
//! SDNode predicate for i16 sign-extended, 10-bit immediate values
@@ -80,7 +80,7 @@ namespace {
bool
isI16IntU10Immediate(ConstantSDNode *CN)
{
- return isU10Constant((short) CN->getZExtValue());
+ return isUInt<10>((short) CN->getZExtValue());
}
//! SDNode predicate for i16 sign-extended, 10-bit immediate values
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index e863ee3..4b0d442 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1107,7 +1107,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
- SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
+ unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
+ SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
false, false, 0);
Chain = Store.getOperand(0);
@@ -1491,7 +1492,7 @@ SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
return SDValue();
Value = Value >> 32;
}
- if (isS10Constant(Value))
+ if (isInt<10>(Value))
return DAG.getTargetConstant(Value, ValueType);
}
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 2306665..86825c8 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -450,7 +450,15 @@ SPUInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -513,6 +521,11 @@ SPUInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
if (I == MBB.begin())
return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (!isCondBranch(I) && !isUncondBranch(I))
return 0;
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 5068f77..6d1f87d 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1268,7 +1268,12 @@ multiclass BitwiseAnd
defm AND : BitwiseAnd;
-// N.B.: vnot_conv is one of those special target selection pattern fragments,
+
+def vnot_cell_conv : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v4i32 immAllOnesV)))>;
+
+// N.B.: vnot_cell_conv is one of those special target selection pattern
+// fragments,
// in which we expect there to be a bit_convert on the constant. Bear in mind
// that llvm translates "not <reg>" to "xor <reg>, -1" (or in this case, a
// constant -1 vector.)
@@ -1301,7 +1306,7 @@ multiclass AndComplement
def r8: ANDCRegInst<R8C>;
// Sometimes, the xor pattern has a bitcast constant:
- def v16i8_conv: ANDCVecInst<v16i8, vnot_conv>;
+ def v16i8_conv: ANDCVecInst<v16i8, vnot_cell_conv>;
}
defm ANDC : AndComplement;
@@ -1934,7 +1939,7 @@ multiclass SelectBits
def v16i8: SELBVecInst<v16i8>;
def v8i16: SELBVecInst<v8i16>;
def v4i32: SELBVecInst<v4i32>;
- def v2i64: SELBVecInst<v2i64, vnot_conv>;
+ def v2i64: SELBVecInst<v2i64, vnot_cell_conv>;
def r128: SELBRegInst<GPRC>;
def r64: SELBRegInst<R64C>;
@@ -4373,7 +4378,7 @@ def : Pat<(v2f64 (bitconvert (v16i8 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v8i16 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v4i32 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(v2f64 (bitconvert (v2i64 VECREG:$src))), (v2f64 VECREG:$src)>;
-def : Pat<(v2f64 (bitconvert (v2f64 VECREG:$src))), (v2f64 VECREG:$src)>;
+def : Pat<(v2f64 (bitconvert (v4f32 VECREG:$src))), (v2f64 VECREG:$src)>;
def : Pat<(i128 (bitconvert (v16i8 VECREG:$src))),
(ORi128_vec VECREG:$src)>;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 8c78bab..f3071f2 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -28,6 +28,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLocation.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -336,6 +337,7 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo *MFI = MF.getFrameInfo();
+ DebugLoc dl = II->getDebugLoc();
while (!MI.getOperand(i).isFI()) {
++i;
@@ -364,11 +366,22 @@ SPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
// Replace the FrameIndex with base register with $sp (aka $r1)
SPOp.ChangeToRegister(SPU::R1, false);
- if (Offset > SPUFrameInfo::maxFrameOffset()
- || Offset < SPUFrameInfo::minFrameOffset()) {
- errs() << "Large stack adjustment ("
- << Offset
- << ") in SPURegisterInfo::eliminateFrameIndex.";
+
+ // if 'Offset' doesn't fit to the D-form instruction's
+ // immediate, convert the instruction to X-form
+ // if the instruction is not an AI (which takes a s10 immediate), assume
+ // it is a load/store that can take a s14 immediate
+ if ((MI.getOpcode() == SPU::AIr32 && !isInt<10>(Offset))
+ || !isInt<14>(Offset)) {
+ int newOpcode = convertDFormToXForm(MI.getOpcode());
+ unsigned tmpReg = findScratchRegister(II, RS, &SPU::R32CRegClass, SPAdj);
+ BuildMI(MBB, II, dl, TII.get(SPU::ILr32), tmpReg )
+ .addImm(Offset);
+ BuildMI(MBB, II, dl, TII.get(newOpcode), MI.getOperand(0).getReg())
+ .addReg(tmpReg, RegState::Kill)
+ .addReg(SPU::R1);
+ // remove the replaced D-form instruction
+ MBB.erase(II);
} else {
MO.ChangeToImmediate(Offset);
}
@@ -423,6 +436,14 @@ void SPURegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
MF.getRegInfo().setPhysRegUnused(SPU::R0);
MF.getRegInfo().setPhysRegUnused(SPU::R1);
MF.getRegInfo().setPhysRegUnused(SPU::R2);
+
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ const TargetRegisterClass *RC = &SPU::R32CRegClass;
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
+
+
}
void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
@@ -448,7 +469,8 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitPrologue: FrameSize not aligned");
- if (FrameSize > 0 || MFI->hasCalls()) {
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->hasCalls()) {
FrameSize = -(FrameSize + SPUFrameInfo::minStackSize());
if (hasDebugInfo) {
// Mark effective beginning of when frame pointer becomes valid.
@@ -460,14 +482,14 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
// for the ABI
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R0).addImm(16)
.addReg(SPU::R1);
- if (isS10Constant(FrameSize)) {
+ if (isInt<10>(FrameSize)) {
// Spill $sp to adjusted $sp
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1).addImm(FrameSize)
.addReg(SPU::R1);
// Adjust $sp by required amout
BuildMI(MBB, MBBI, dl, TII.get(SPU::AIr32), SPU::R1).addReg(SPU::R1)
.addImm(FrameSize);
- } else if (FrameSize <= (1 << 16) - 1 && FrameSize >= -(1 << 16)) {
+ } else if (isInt<16>(FrameSize)) {
// Frame size can be loaded into ILr32n, so temporarily spill $r2 and use
// $r2 to adjust $sp:
BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr128), SPU::R2)
@@ -475,7 +497,7 @@ void SPURegisterInfo::emitPrologue(MachineFunction &MF) const
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::ILr32), SPU::R2)
.addImm(FrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(SPU::STQDr32), SPU::R1)
+ BuildMI(MBB, MBBI, dl, TII.get(SPU::STQXr32), SPU::R1)
.addReg(SPU::R2)
.addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::Ar32), SPU::R1)
@@ -549,9 +571,11 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
"Can only insert epilog into returning blocks");
assert((FrameSize & 0xf) == 0
&& "SPURegisterInfo::emitEpilogue: FrameSize not aligned");
- if (FrameSize > 0 || MFI->hasCalls()) {
+
+ // the "empty" frame size is 16 - just the register scavenger spill slot
+ if (FrameSize > 16 || MFI->hasCalls()) {
FrameSize = FrameSize + SPUFrameInfo::minStackSize();
- if (isS10Constant(FrameSize + LinkSlotOffset)) {
+ if (isInt<10>(FrameSize + LinkSlotOffset)) {
// Reload $lr, adjust $sp by required amount
// Note: We do this to slightly improve dual issue -- not by much, but it
// is an opportunity for dual issue.
@@ -574,7 +598,7 @@ SPURegisterInfo::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
.addReg(SPU::R2);
BuildMI(MBB, MBBI, dl, TII.get(SPU::LQDr128), SPU::R0)
.addImm(16)
- .addReg(SPU::R2);
+ .addReg(SPU::R1);
BuildMI(MBB, MBBI, dl, TII.get(SPU::SFIr32), SPU::R2).
addReg(SPU::R2)
.addImm(16);
@@ -618,4 +642,43 @@ SPURegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return SPUGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
+int
+SPURegisterInfo::convertDFormToXForm(int dFormOpcode) const
+{
+ switch(dFormOpcode)
+ {
+ case SPU::AIr32: return SPU::Ar32;
+ case SPU::LQDr32: return SPU::LQXr32;
+ case SPU::LQDr128: return SPU::LQXr128;
+ case SPU::LQDv16i8: return SPU::LQXv16i8;
+ case SPU::LQDv4f32: return SPU::LQXv4f32;
+ case SPU::STQDr32: return SPU::STQXr32;
+ case SPU::STQDr128: return SPU::STQXr128;
+ case SPU::STQDv16i8: return SPU::STQXv16i8;
+ case SPU::STQDv4i32: return SPU::STQXv4i32;
+ case SPU::STQDv4f32: return SPU::STQXv4f32;
+
+ default: assert( false && "Unhandled D to X-form conversion");
+ }
+ // default will assert, but need to return something to keep the
+ // compiler happy.
+ return dFormOpcode;
+}
+
+// TODO this is already copied from PPC. Could this convenience function
+// be moved to the RegScavenger class?
+unsigned
+SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const
+{
+ assert(RS && "Register scavenging must be on");
+ unsigned Reg = RS->FindUnusedReg(RC);
+ if (Reg == 0)
+ Reg = RS->scavengeRegister(RC, II, SPAdj);
+ assert( Reg && "Register scavenger failed");
+ return Reg;
+}
+
#include "SPUGenRegisterInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 48feb5c..0a70318 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -53,6 +53,10 @@ namespace llvm {
virtual const TargetRegisterClass* const *
getCalleeSavedRegClasses(const MachineFunction *MF) const;
+ //! Allow for scavenging, so we can get scratch registers when needed.
+ virtual bool requiresRegisterScavenging(const MachineFunction &MF) const
+ { return true; }
+
//! Return the reserved registers
BitVector getReservedRegs(const MachineFunction &MF) const;
@@ -97,6 +101,21 @@ namespace llvm {
//! Get DWARF debugging register number
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+
+ //! Convert D-form load/store to X-form load/store
+ /*!
+ Converts a regiser displacement load/store into a register-indexed
+ load/store for large stack frames, when the stack frame exceeds the
+ range of a s10 displacement.
+ */
+ int convertDFormToXForm(int dFormOpcode) const;
+
+ //! Acquire an unused register in an emergency.
+ unsigned findScratchRegister(MachineBasicBlock::iterator II,
+ RegScavenger *RS,
+ const TargetRegisterClass *RC,
+ int SPAdj) const;
+
};
} // end namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
index 76eb563..82552fa 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsics.td
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -21,11 +21,11 @@ let TargetPrefix = "mblaze", isTarget = 1 in {
[llvm_i32_ty],
[IntrWriteMem]>;
- class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty],
+ class MBFSL_Put_Intrinsic : Intrinsic<[],
[llvm_i32_ty, llvm_i32_ty],
[IntrWriteMem]>;
- class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty],
+ class MBFSL_PutT_Intrinsic : Intrinsic<[],
[llvm_i32_ty],
[IntrWriteMem]>;
}
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index ac41cc8..15d16ec 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -424,7 +424,7 @@ void MSILWriter::printPtrLoad(uint64_t N) {
case Module::Pointer32:
printSimpleInstruction("ldc.i4",utostr(N).c_str());
// FIXME: Need overflow test?
- if (!isUInt32(N)) {
+ if (!isUInt<32>(N)) {
errs() << "Value = " << utostr(N) << '\n';
llvm_unreachable("32-bit pointer overflowed");
}
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
index 32c6b04..f4d7d8a 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430MCInstLower.cpp
@@ -59,7 +59,7 @@ GetJumpTableSymbol(const MachineOperand &MO) const {
}
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCSymbol *MSP430MCInstLower::
@@ -75,7 +75,7 @@ GetConstantPoolIndexSymbol(const MachineOperand &MO) const {
}
// Create a symbol for the name.
- return Ctx.GetOrCreateTemporarySymbol(Name.str());
+ return Ctx.GetOrCreateSymbol(Name.str());
}
MCOperand MSP430MCInstLower::
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 6372482..e584770 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -173,6 +173,8 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
if (I->getOpcode() != MSP430::JMP &&
I->getOpcode() != MSP430::JCC)
break;
@@ -241,6 +243,9 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
+
// Working from the bottom, when we see a non-terminator
// instruction, we're done.
if (!isUnpredicatedTerminator(I))
diff --git a/lib/Target/Mangler.cpp b/lib/Target/Mangler.cpp
index fb93706..1d5c511 100644
--- a/lib/Target/Mangler.cpp
+++ b/lib/Target/Mangler.cpp
@@ -235,10 +235,7 @@ std::string Mangler::getNameWithPrefix(const GlobalValue *GV,
MCSymbol *Mangler::getSymbol(const GlobalValue *GV) {
SmallString<60> NameStr;
getNameWithPrefix(NameStr, GV, false);
- if (!GV->hasPrivateLinkage())
- return Context.GetOrCreateSymbol(NameStr.str());
-
- return Context.GetOrCreateTemporarySymbol(NameStr.str());
+ return Context.GetOrCreateSymbol(NameStr.str());
}
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index fa4518d..e948917 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -26,8 +26,9 @@
// Floating Point Compare and Branch
def SDT_MipsFPBrcond : SDTypeProfile<0, 3, [SDTCisSameAs<0, 2>, SDTCisInt<0>,
SDTCisVT<1, OtherVT>]>;
-def SDT_MipsFPCmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisFP<0>,
- SDTCisInt<2>]>;
+def SDT_MipsFPCmp : SDTypeProfile<1, 3, [SDTCisVT<0, i32>,
+ SDTCisSameAs<1, 2>, SDTCisFP<1>,
+ SDTCisInt<3>]>;
def SDT_MipsFPSelectCC : SDTypeProfile<1, 4, [SDTCisInt<1>, SDTCisInt<4>,
SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>]>;
@@ -244,12 +245,13 @@ def MIPS_FCOND_NGT : PatLeaf<(i32 15)>;
/// Floating Point Compare
let hasDelaySlot = 1, Defs=[FCR31] in {
def FCMP_S32 : FCC<0x0, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc),
- "c.$cc.s $fs, $ft", [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc),
- (implicit FCR31)]>;
+ "c.$cc.s $fs, $ft",
+ [(set FCR31, (MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc))]>;
def FCMP_D32 : FCC<0x1, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc),
- "c.$cc.d $fs, $ft", [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc),
- (implicit FCR31)]>, Requires<[In32BitMode]>;
+ "c.$cc.d $fs, $ft",
+ [(set FCR31, (MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc))]>,
+ Requires<[In32BitMode]>;
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 1a9bffc..85cf064 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -433,7 +433,15 @@ bool MipsInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
{
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -562,6 +570,11 @@ RemoveBranch(MachineBasicBlock &MBB) const
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (I->getOpcode() != Mips::J &&
GetCondFromBranchOpc(I->getOpcode()) == Mips::COND_INVALID)
return 0;
diff --git a/lib/Target/PIC16/PIC16InstrInfo.cpp b/lib/Target/PIC16/PIC16InstrInfo.cpp
index 2fb405e..da16e83 100644
--- a/lib/Target/PIC16/PIC16InstrInfo.cpp
+++ b/lib/Target/PIC16/PIC16InstrInfo.cpp
@@ -226,6 +226,11 @@ bool PIC16InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
// Get the terminator instruction.
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return true;
+ --I;
+ }
// Handle unconditional branches. If the unconditional branch's target is
// successor basic block then remove the unconditional branch.
if (I->getOpcode() == PIC16::br_uncond && AllowModify) {
diff --git a/lib/Target/PIC16/PIC16Section.cpp b/lib/Target/PIC16/PIC16Section.cpp
index a96ebb8..2505b11 100644
--- a/lib/Target/PIC16/PIC16Section.cpp
+++ b/lib/Target/PIC16/PIC16Section.cpp
@@ -17,10 +17,9 @@ using namespace llvm;
// This is the only way to create a PIC16Section. Sections created here
// do not need to be explicitly deleted as they are managed by auto_ptrs.
-PIC16Section *PIC16Section::Create(const StringRef &Name,
- PIC16SectionType Ty,
- const std::string &Address,
- int Color, MCContext &Ctx) {
+PIC16Section *PIC16Section::Create(StringRef Name, PIC16SectionType Ty,
+ StringRef Address, int Color,
+ MCContext &Ctx) {
/// Determine the internal SectionKind info.
/// Users of PIC16Section class should not need to know the internal
@@ -59,8 +58,17 @@ PIC16Section *PIC16Section::Create(const StringRef &Name,
}
+ // Copy strings into context allocated memory so they get free'd when the
+ // context is destroyed.
+ char *NameCopy = static_cast<char*>(Ctx.Allocate(Name.size(), 1));
+ memcpy(NameCopy, Name.data(), Name.size());
+ char *AddressCopy = static_cast<char*>(Ctx.Allocate(Address.size(), 1));
+ memcpy(AddressCopy, Address.data(), Address.size());
+
// Create the Section.
- PIC16Section *S = new (Ctx) PIC16Section(Name, K, Address, Color);
+ PIC16Section *S =
+ new (Ctx) PIC16Section(StringRef(NameCopy, Name.size()), K,
+ StringRef(AddressCopy, Address.size()), Color);
S->T = Ty;
return S;
}
diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h
index 566f920..9039ca7 100644
--- a/lib/Target/PIC16/PIC16Section.h
+++ b/lib/Target/PIC16/PIC16Section.h
@@ -30,11 +30,11 @@ namespace llvm {
PIC16SectionType T;
/// Name of the section to uniquely identify it.
- std::string Name;
+ StringRef Name;
/// User can specify an address at which a section should be placed.
/// Negative value here means user hasn't specified any.
- std::string Address;
+ StringRef Address;
/// Overlay information - Sections with same color can be overlaid on
/// one another.
@@ -43,17 +43,16 @@ namespace llvm {
/// Total size of all data objects contained here.
unsigned Size;
- PIC16Section(const StringRef &name, SectionKind K, const std::string &addr,
- int color)
+ PIC16Section(StringRef name, SectionKind K, StringRef addr, int color)
: MCSection(K), Name(name), Address(addr), Color(color), Size(0) {
}
public:
/// Return the name of the section.
- const std::string &getName() const { return Name; }
+ StringRef getName() const { return Name; }
/// Return the Address of the section.
- const std::string &getAddress() const { return Address; }
+ StringRef getAddress() const { return Address; }
/// Return the Color of the section.
int getColor() const { return Color; }
@@ -64,6 +63,8 @@ namespace llvm {
void setSize(unsigned size) { Size = size; }
/// Conatined data objects.
+ // FIXME: This vector is leaked because sections are allocated with a
+ // BumpPtrAllocator.
std::vector<const GlobalVariable *>Items;
/// Check section type.
@@ -77,8 +78,8 @@ namespace llvm {
PIC16SectionType getType() const { return T; }
/// This would be the only way to create a section.
- static PIC16Section *Create(const StringRef &Name, PIC16SectionType Ty,
- const std::string &Address, int Color,
+ static PIC16Section *Create(StringRef Name, PIC16SectionType Ty,
+ StringRef Address, int Color,
MCContext &Ctx);
/// Override this as PIC16 has its own way of printing switching
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index ed6fc9d..5adefd3 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -309,8 +309,8 @@ namespace {
const MCSymbol *&TOCEntry = TOC[Sym];
if (TOCEntry == 0)
TOCEntry = OutContext.
- GetOrCreateTemporarySymbol(StringRef(MAI->getPrivateGlobalPrefix()) +
- "C" + Twine(LabelID++));
+ GetOrCreateSymbol(StringRef(MAI->getPrivateGlobalPrefix()) +
+ "C" + Twine(LabelID++));
O << *TOCEntry << "@toc";
}
@@ -674,14 +674,14 @@ static const MCSymbol *GetLazyPtr(const MCSymbol *Sym, MCContext &Ctx) {
// Remove $stub suffix, add $lazy_ptr.
SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end()-5);
TmpStr += "$lazy_ptr";
- return Ctx.GetOrCreateTemporarySymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
}
static const MCSymbol *GetAnonSym(const MCSymbol *Sym, MCContext &Ctx) {
// Add $tmp suffix to $stub, yielding $stub$tmp.
SmallString<128> TmpStr(Sym->getName().begin(), Sym->getName().end());
TmpStr += "$tmp";
- return Ctx.GetOrCreateTemporarySymbol(TmpStr.str());
+ return Ctx.GetOrCreateSymbol(TmpStr.str());
}
void PPCDarwinAsmPrinter::
@@ -811,6 +811,11 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
OutStreamer.EmitIntValue(0, isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
else
// Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info pointers
+ // need to be indirect and pc-rel. We accomplish this by using NLPs.
+ // However, sometimes the types are local to the file. So we need to
+ // fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
diff --git a/lib/Target/PowerPC/PPCBranchSelector.cpp b/lib/Target/PowerPC/PPCBranchSelector.cpp
index a752421..52948c8 100644
--- a/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -130,7 +130,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
}
// If this branch is in range, ignore it.
- if (isInt16(BranchSize)) {
+ if (isInt<16>(BranchSize)) {
MBBStartOffset += 4;
continue;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 9d79c0d..4f88d35d 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -470,11 +470,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
if (isInt32Immediate(RHS, Imm)) {
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
- if (isUInt16(Imm))
+ if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// If this is a 16-bit signed immediate, fold it.
- if (isInt16((int)Imm))
+ if (isInt<16>((int)Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
@@ -494,7 +494,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
}
Opc = PPC::CMPLW;
} else if (ISD::isUnsignedIntSetCC(CC)) {
- if (isInt32Immediate(RHS, Imm) && isUInt16(Imm))
+ if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
Opc = PPC::CMPLW;
@@ -511,11 +511,11 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
if (isInt64Immediate(RHS.getNode(), Imm)) {
// SETEQ/SETNE comparison with 16-bit immediate, fold it.
- if (isUInt16(Imm))
+ if (isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
// If this is a 16-bit signed immediate, fold it.
- if (isInt16(Imm))
+ if (isInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
getI32Imm(Imm & 0xFFFF)), 0);
@@ -528,7 +528,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
// xoris r0,r3,0x1234
// cmpldi cr0,r0,0x5678
// beq cr0,L6
- if (isUInt32(Imm)) {
+ if (isUInt<32>(Imm)) {
SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
getI64Imm(Imm >> 16)), 0);
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
@@ -537,7 +537,7 @@ SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
}
Opc = PPC::CMPLD;
} else if (ISD::isUnsignedIntSetCC(CC)) {
- if (isInt64Immediate(RHS.getNode(), Imm) && isUInt16(Imm))
+ if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
getI64Imm(Imm & 0xFFFF)), 0);
Opc = PPC::CMPLD;
@@ -761,12 +761,12 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
unsigned Shift = 0;
// If it can't be represented as a 32 bit value.
- if (!isInt32(Imm)) {
+ if (!isInt<32>(Imm)) {
Shift = CountTrailingZeros_64(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
// If the shifted value fits 32 bits.
- if (isInt32(ImmSh)) {
+ if (isInt<32>(ImmSh)) {
// Go with the shifted value.
Imm = ImmSh;
} else {
@@ -785,7 +785,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
unsigned Hi = (Imm >> 16) & 0xFFFF;
// Simple value.
- if (isInt16(Imm)) {
+ if (isInt<16>(Imm)) {
// Just the Lo bits.
Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(Lo));
} else if (Lo) {
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 2c072c1..e67666d 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -5539,8 +5539,16 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
return false;
}
-EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
+/// getOptimalMemOpType - Returns the target specific optimal type for load
+/// and store operations as a result of memset, memcpy, and memmove lowering.
+/// If DstAlign is zero that means it's safe to destination alignment can
+/// satisfy any constraint. Similarly if SrcAlign is zero it means there
+/// isn't a need to check it against alignment requirement, probably because
+/// the source does not need to be loaded. It returns EVT::Other if
+/// SelectionDAG should be responsible for determining it.
+EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP,
SelectionDAG &DAG) const {
if (this->PPCSubTarget.isPPC64()) {
return MVT::i64;
diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h
index 9c390ac..19fefab 100644
--- a/lib/Target/PowerPC/PPCISelLowering.h
+++ b/lib/Target/PowerPC/PPCISelLowering.h
@@ -347,9 +347,16 @@ namespace llvm {
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
- virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
- SelectionDAG &DAG) const;
+ /// getOptimalMemOpType - Returns the target specific optimal type for load
+ /// and store operations as a result of memset, memcpy, and memmove lowering.
+ /// If DstAlign is zero that means it's safe to destination alignment can
+ /// satisfy any constraint. Similarly if SrcAlign is zero it means there
+ /// isn't a need to check it against alignment requirement, probably because
+ /// the source does not need to be loaded. It returns EVT::Other if
+ /// SelectionDAG should be responsible for determining it.
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP, SelectionDAG &DAG) const;
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td
index 3ff8f27..256370f 100644
--- a/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -15,6 +15,10 @@
// Altivec transformation functions and pattern fragments.
//
+// Since we canonicalize buildvectors to v16i8, all vnots "-1" operands will be
+// of that type.
+def vnot_ppc : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs),
(vector_shuffle node:$lhs, node:$rhs), [{
@@ -321,7 +325,8 @@ def VAND : VXForm_1<1028, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
[(set VRRC:$vD, (and (v4i32 VRRC:$vA), VRRC:$vB))]>;
def VANDC : VXForm_1<1092, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vandc $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (and (v4i32 VRRC:$vA), (vnot VRRC:$vB)))]>;
+ [(set VRRC:$vD, (and (v4i32 VRRC:$vA),
+ (vnot_ppc VRRC:$vB)))]>;
def VCFSX : VXForm_1<842, (outs VRRC:$vD), (ins u5imm:$UIMM, VRRC:$vB),
"vcfsx $vD, $vB, $UIMM", VecFP,
@@ -435,7 +440,8 @@ def VSUM4UBS: VX1_Int<1544, "vsum4ubs", int_ppc_altivec_vsum4ubs>;
def VNOR : VXForm_1<1284, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vnor $vD, $vA, $vB", VecFP,
- [(set VRRC:$vD, (vnot (or (v4i32 VRRC:$vA), VRRC:$vB)))]>;
+ [(set VRRC:$vD, (vnot_ppc (or (v4i32 VRRC:$vA),
+ VRRC:$vB)))]>;
def VOR : VXForm_1<1156, (outs VRRC:$vD), (ins VRRC:$vA, VRRC:$vB),
"vor $vD, $vA, $vB", VecFP,
[(set VRRC:$vD, (or (v4i32 VRRC:$vA), VRRC:$vB))]>;
@@ -640,12 +646,11 @@ def:Pat<(vmrghw_unary_shuffle (v16i8 VRRC:$vA), undef),
(VMRGHW VRRC:$vA, VRRC:$vA)>;
// Logical Operations
-def : Pat<(v4i32 (vnot VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
-def : Pat<(v4i32 (vnot_conv VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
+def : Pat<(v4i32 (vnot_ppc VRRC:$vA)), (VNOR VRRC:$vA, VRRC:$vA)>;
-def : Pat<(v4i32 (vnot_conv (or VRRC:$A, VRRC:$B))),
+def : Pat<(v4i32 (vnot_ppc (or VRRC:$A, VRRC:$B))),
(VNOR VRRC:$A, VRRC:$B)>;
-def : Pat<(v4i32 (and VRRC:$A, (vnot_conv VRRC:$B))),
+def : Pat<(v4i32 (and VRRC:$A, (vnot_ppc VRRC:$B))),
(VANDC VRRC:$A, VRRC:$B)>;
def : Pat<(fmul VRRC:$vA, VRRC:$vB),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 9895bea..82c637e 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -213,7 +213,15 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -281,6 +289,11 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC)
return 0;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 6e7880e..44c5fe6 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -512,7 +512,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *MI = I;
DebugLoc dl = MI->getDebugLoc();
- if (isInt16(CalleeAmt)) {
+ if (isInt<16>(CalleeAmt)) {
BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg).addReg(StackReg).
addImm(CalleeAmt);
} else {
@@ -596,7 +596,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II,
else
Reg = PPC::R0;
- if (MaxAlign < TargetAlign && isInt16(FrameSize)) {
+ if (MaxAlign < TargetAlign && isInt<16>(FrameSize)) {
BuildMI(MBB, II, dl, TII.get(PPC::ADDI), Reg)
.addReg(PPC::R31)
.addImm(FrameSize);
@@ -798,7 +798,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// clear can be encoded. This is extremely uncommon, because normally you
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
- if (isInt16(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
+ if (isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
if (isIXAddr)
Offset >>= 2; // The actual encoded value has the low two bits zero.
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
@@ -1375,8 +1375,9 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
if (!isPPC64) {
// PPC32.
if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
- assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
.addReg(PPC::R1)
@@ -1390,7 +1391,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::R1)
.addReg(PPC::R1)
.addReg(PPC::R0);
- } else if (isInt16(NegFrameSize)) {
+ } else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
.addReg(PPC::R1)
.addImm(NegFrameSize)
@@ -1408,8 +1409,9 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
}
} else { // PPC64.
if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign)&&isInt16(MaxAlign)&&"Invalid alignment!");
- assert(isInt16(NegFrameSize) && "Unhandled stack size and alignment!");
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+ assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
.addReg(PPC::X1)
@@ -1422,7 +1424,7 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X1)
.addReg(PPC::X1)
.addReg(PPC::X0);
- } else if (isInt16(NegFrameSize)) {
+ } else if (isInt<16>(NegFrameSize)) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
.addReg(PPC::X1)
.addImm(NegFrameSize / 4)
@@ -1591,7 +1593,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
// enabled (=> hasFastCall()==true) the fastcc call might contain a tail
// call which invalidates the stack pointer value in SP(0). So we use the
// value of R31 in this case.
- if (FI->hasFastCall() && isInt16(FrameSize)) {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
assert(hasFP(MF) && "Expecting a valid the frame pointer.");
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
.addReg(PPC::R31).addImm(FrameSize);
@@ -1605,7 +1607,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
.addReg(PPC::R1)
.addReg(PPC::R31)
.addReg(PPC::R0);
- } else if (isInt16(FrameSize) &&
+ } else if (isInt<16>(FrameSize) &&
(!ALIGN_STACK || TargetAlign >= MaxAlign) &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
@@ -1615,7 +1617,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
.addImm(0).addReg(PPC::R1);
}
} else {
- if (FI->hasFastCall() && isInt16(FrameSize)) {
+ if (FI->hasFastCall() && isInt<16>(FrameSize)) {
assert(hasFP(MF) && "Expecting a valid the frame pointer.");
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
.addReg(PPC::X31).addImm(FrameSize);
@@ -1629,7 +1631,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
.addReg(PPC::X1)
.addReg(PPC::X31)
.addReg(PPC::X0);
- } else if (isInt16(FrameSize) && TargetAlign >= MaxAlign &&
+ } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
!MFI->hasVarSizedObjects()) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
.addReg(PPC::X1).addImm(FrameSize);
@@ -1678,7 +1680,7 @@ void PPCRegisterInfo::emitEpilogue(MachineFunction &MF,
unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
- if (CallerAllocatedAmt && isInt16(CallerAllocatedAmt)) {
+ if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
.addReg(StackReg).addImm(CallerAllocatedAmt);
} else {
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index f46840c..8c5e905 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -316,19 +316,19 @@ def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src),
let Defs = [PSW] in {
def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2),
"cebr\t$src1, $src2",
- [(SystemZcmp FP32:$src1, FP32:$src2), (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>;
def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2),
"cdbr\t$src1, $src2",
- [(SystemZcmp FP64:$src1, FP64:$src2), (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>;
def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2),
"ceb\t$src1, $src2",
- [(SystemZcmp FP32:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP32:$src1,
+ (load rriaddr12:$src2)))]>;
def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2),
"cdb\t$src1, $src2",
- [(SystemZcmp FP64:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp FP64:$src1,
+ (load rriaddr12:$src2)))]>;
} // Defs = [PSW]
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 5fa7e8c..06f01e7 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -424,6 +424,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
// Working from the bottom, when we see a non-terminator
// instruction, we're done.
if (!isUnpredicatedTerminator(I))
@@ -500,6 +502,8 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
if (I->getOpcode() != SystemZ::JMP &&
getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID)
break;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 0d1af23..22bde4e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -31,7 +31,8 @@ class SDTCisI64<int OpNum> : SDTCisVT<OpNum, i64>;
def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>;
def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>;
-def SDT_CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
+def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>,
+ SDTCisSameAs<1, 2>]>;
def SDT_BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisI8<1>, SDTCisVT<2, i64>]>;
@@ -980,100 +981,89 @@ let Defs = [PSW] in {
def CMP32rr : RRI<0x19,
(outs), (ins GR32:$src1, GR32:$src2),
"cr\t$src1, $src2",
- [(SystemZcmp GR32:$src1, GR32:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>;
def CMP64rr : RREI<0xB920,
(outs), (ins GR64:$src1, GR64:$src2),
"cgr\t$src1, $src2",
- [(SystemZcmp GR64:$src1, GR64:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>;
def CMP32ri : RILI<0xC2D,
(outs), (ins GR32:$src1, s32imm:$src2),
"cfi\t$src1, $src2",
- [(SystemZcmp GR32:$src1, imm:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>;
def CMP64ri32 : RILI<0xC2C,
(outs), (ins GR64:$src1, s32imm64:$src2),
"cgfi\t$src1, $src2",
- [(SystemZcmp GR64:$src1, i64immSExt32:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>;
def CMP32rm : RXI<0x59,
(outs), (ins GR32:$src1, rriaddr12:$src2),
"c\t$src1, $src2",
- [(SystemZcmp GR32:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>;
def CMP32rmy : RXYI<0xE359,
(outs), (ins GR32:$src1, rriaddr:$src2),
"cy\t$src1, $src2",
- [(SystemZcmp GR32:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>;
def CMP64rm : RXYI<0xE320,
(outs), (ins GR64:$src1, rriaddr:$src2),
"cg\t$src1, $src2",
- [(SystemZcmp GR64:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>;
def UCMP32rr : RRI<0x15,
(outs), (ins GR32:$src1, GR32:$src2),
"clr\t$src1, $src2",
- [(SystemZucmp GR32:$src1, GR32:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>;
def UCMP64rr : RREI<0xB921,
(outs), (ins GR64:$src1, GR64:$src2),
"clgr\t$src1, $src2",
- [(SystemZucmp GR64:$src1, GR64:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>;
def UCMP32ri : RILI<0xC2F,
(outs), (ins GR32:$src1, i32imm:$src2),
"clfi\t$src1, $src2",
- [(SystemZucmp GR32:$src1, imm:$src2),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>;
def UCMP64ri32 : RILI<0xC2E,
(outs), (ins GR64:$src1, i64i32imm:$src2),
"clgfi\t$src1, $src2",
- [(SystemZucmp GR64:$src1, i64immZExt32:$src2),
- (implicit PSW)]>;
+ [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>;
def UCMP32rm : RXI<0x55,
(outs), (ins GR32:$src1, rriaddr12:$src2),
"cl\t$src1, $src2",
- [(SystemZucmp GR32:$src1, (load rriaddr12:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr12:$src2)))]>;
def UCMP32rmy : RXYI<0xE355,
(outs), (ins GR32:$src1, rriaddr:$src2),
"cly\t$src1, $src2",
- [(SystemZucmp GR32:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR32:$src1,
+ (load rriaddr:$src2)))]>;
def UCMP64rm : RXYI<0xE351,
(outs), (ins GR64:$src1, rriaddr:$src2),
"clg\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (load rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (load rriaddr:$src2)))]>;
def CMPSX64rr32 : RREI<0xB930,
(outs), (ins GR64:$src1, GR32:$src2),
"cgfr\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (sext GR32:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sext GR32:$src2)))]>;
def UCMPZX64rr32 : RREI<0xB931,
(outs), (ins GR64:$src1, GR32:$src2),
"clgfr\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (zext GR32:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zext GR32:$src2)))]>;
def CMPSX64rm32 : RXYI<0xE330,
(outs), (ins GR64:$src1, rriaddr:$src2),
"cgf\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (sextloadi64i32 rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (sextloadi64i32 rriaddr:$src2)))]>;
def UCMPZX64rm32 : RXYI<0xE331,
(outs), (ins GR64:$src1, rriaddr:$src2),
"clgf\t$src1, $src2",
- [(SystemZucmp GR64:$src1, (zextloadi64i32 rriaddr:$src2)),
- (implicit PSW)]>;
+ [(set PSW, (SystemZucmp GR64:$src1,
+ (zextloadi64i32 rriaddr:$src2)))]>;
// FIXME: Add other crazy ucmp forms
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index c3dcf8e..66bb914 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -524,6 +524,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
else
// Internal to current translation unit.
+ //
+ // When we place the LSDA into the TEXT section, the type info
+ // pointers need to be indirect and pc-rel. We accomplish this by
+ // using NLPs. However, sometimes the types are local to the file. So
+ // we need to fill in the value for the NLP in those cases.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(MCSym.getPointer(),
OutContext),
4/*size*/, 0/*addrspace*/);
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 7d29d97..c851ca3 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -83,7 +83,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
case X86II::MO_DARWIN_NONLAZY:
case X86II::MO_DARWIN_NONLAZY_PIC_BASE: {
Name += "$non_lazy_ptr";
- MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getGVStubEntry(Sym);
@@ -98,7 +98,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
case X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: {
Name += "$non_lazy_ptr";
- MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getHiddenGVStubEntry(Sym);
if (StubSym.getPointer() == 0) {
@@ -112,7 +112,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
}
case X86II::MO_DARWIN_STUB: {
Name += "$stub";
- MCSymbol *Sym = Ctx.GetOrCreateTemporarySymbol(Name.str());
+ MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name.str());
MachineModuleInfoImpl::StubValueTy &StubSym =
getMachOMMI().getFnStubEntry(Sym);
if (StubSym.getPointer())
@@ -127,7 +127,7 @@ GetSymbolFromOperand(const MachineOperand &MO) const {
Name.erase(Name.end()-5, Name.end());
StubSym =
MachineModuleInfoImpl::
- StubValueTy(Ctx.GetOrCreateTemporarySymbol(Name.str()), false);
+ StubValueTy(Ctx.GetOrCreateSymbol(Name.str()), false);
}
return Sym;
}
@@ -287,7 +287,9 @@ void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
LowerUnaryToTwoAddr(OutMI, X86::MMX_PCMPEQDrr); break;
case X86::FsFLD0SS: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::FsFLD0SD: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
- case X86::V_SET0: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PS: LowerUnaryToTwoAddr(OutMI, X86::XORPSrr); break;
+ case X86::V_SET0PD: LowerUnaryToTwoAddr(OutMI, X86::XORPDrr); break;
+ case X86::V_SET0PI: LowerUnaryToTwoAddr(OutMI, X86::PXORrr); break;
case X86::V_SETALLONES: LowerUnaryToTwoAddr(OutMI, X86::PCMPEQDrr); break;
case X86::MOV16r0:
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 4d3dedf..22285f1 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ SSEDomainFix.cpp
X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
diff --git a/lib/Target/X86/SSEDomainFix.cpp b/lib/Target/X86/SSEDomainFix.cpp
new file mode 100644
index 0000000..395ab57
--- /dev/null
+++ b/lib/Target/X86/SSEDomainFix.cpp
@@ -0,0 +1,499 @@
+//===- SSEDomainFix.cpp - Use proper int/float domain for SSE ---*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the SSEDomainFix pass.
+//
+// Some SSE instructions like mov, and, or, xor are available in different
+// variants for different operand types. These variant instructions are
+// equivalent, but on Nehalem and newer cpus there is extra latency
+// transferring data between integer and floating point domains.
+//
+// This pass changes the variant instructions to minimize domain crossings.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "sse-domain-fix"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+/// Allocate objects from a pool, allow objects to be recycled, and provide a
+/// way of deleting everything.
+template<typename T, unsigned PageSize = 64>
+class PoolAllocator {
+ std::vector<T*> Pages, Avail;
+public:
+ ~PoolAllocator() { Clear(); }
+
+ T* Alloc() {
+ if (Avail.empty()) {
+ T *p = new T[PageSize];
+ Pages.push_back(p);
+ Avail.reserve(PageSize);
+ for (unsigned n = 0; n != PageSize; ++n)
+ Avail.push_back(p+n);
+ }
+ T *p = Avail.back();
+ Avail.pop_back();
+ return p;
+ }
+
+ // Allow object to be reallocated. It won't be reconstructed.
+ void Recycle(T *p) {
+ p->clear();
+ Avail.push_back(p);
+ }
+
+ // Destroy all objects, make sure there are no external pointers to them.
+ void Clear() {
+ Avail.clear();
+ while (!Pages.empty()) {
+ delete[] Pages.back();
+ Pages.pop_back();
+ }
+ }
+};
+
+/// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track
+/// of execution domains.
+///
+/// An open DomainValue represents a set of instructions that can still switch
+/// execution domain. Multiple registers may refer to the same open
+/// DomainValue - they will eventually be collapsed to the same execution
+/// domain.
+///
+/// A collapsed DomainValue represents a single register that has been forced
+/// into one of more execution domains. There is a separate collapsed
+/// DomainValue for each register, but it may contain multiple execution
+/// domains. A register value is initially created in a single execution
+/// domain, but if we were forced to pay the penalty of a domain crossing, we
+/// keep track of the fact the the register is now available in multiple
+/// domains.
+struct DomainValue {
+ // Basic reference counting.
+ unsigned Refs;
+
+ // Available domains. For an open DomainValue, it is the still possible
+ // domains for collapsing. For a collapsed DomainValue it is the domains where
+ // the register is available for free.
+ unsigned Mask;
+
+ // Position of the last defining instruction.
+ unsigned Dist;
+
+ // Twiddleable instructions using or defining these registers.
+ SmallVector<MachineInstr*, 8> Instrs;
+
+ // Collapsed DomainValue have no instructions to twiddle - it simply keeps
+ // track of the domains where the registers are already available.
+ bool collapsed() const { return Instrs.empty(); }
+
+ // Is any domain in mask available?
+ bool compat(unsigned mask) const {
+ return Mask & mask;
+ }
+
+ // Mark domain as available.
+ void add(unsigned domain) {
+ Mask |= 1u << domain;
+ }
+
+ // First domain available in mask.
+ unsigned firstDomain() const {
+ return CountTrailingZeros_32(Mask);
+ }
+
+ DomainValue() { clear(); }
+
+ void clear() {
+ Refs = Mask = Dist = 0;
+ Instrs.clear();
+ }
+};
+
+static const unsigned NumRegs = 16;
+
+class SSEDomainFixPass : public MachineFunctionPass {
+ static char ID;
+ PoolAllocator<DomainValue> Pool;
+
+ MachineFunction *MF;
+ const X86InstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineBasicBlock *MBB;
+ DomainValue **LiveRegs;
+ typedef DenseMap<MachineBasicBlock*,DomainValue**> LiveOutMap;
+ LiveOutMap LiveOuts;
+ unsigned Distance;
+
+public:
+ SSEDomainFixPass() : MachineFunctionPass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual const char *getPassName() const {
+ return "SSE execution domain fixup";
+ }
+
+private:
+ // Register mapping.
+ int RegIndex(unsigned Reg);
+
+ // LiveRegs manipulations.
+ void SetLiveReg(int rx, DomainValue *DV);
+ void Kill(int rx);
+ void Force(int rx, unsigned domain);
+ void Collapse(DomainValue *dv, unsigned domain);
+ bool Merge(DomainValue *A, DomainValue *B);
+
+ void enterBasicBlock();
+ void visitGenericInstr(MachineInstr*);
+ void visitSoftInstr(MachineInstr*, unsigned mask);
+ void visitHardInstr(MachineInstr*, unsigned domain);
+};
+}
+
+char SSEDomainFixPass::ID = 0;
+
+/// Translate TRI register number to an index into our smaller tables of
+/// interesting registers. Return -1 for boring registers.
+int SSEDomainFixPass::RegIndex(unsigned reg) {
+ // Registers are sorted lexicographically.
+ // We just need them to be consecutive, ordering doesn't matter.
+ assert(X86::XMM9 == X86::XMM0+NumRegs-1 && "Unexpected sort");
+ reg -= X86::XMM0;
+ return reg < NumRegs ? reg : -1;
+}
+
+/// Set LiveRegs[rx] = dv, updating reference counts.
+void SSEDomainFixPass::SetLiveReg(int rx, DomainValue *dv) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs)
+ LiveRegs = (DomainValue**)calloc(sizeof(DomainValue*), NumRegs);
+
+ if (LiveRegs[rx] == dv)
+ return;
+ if (LiveRegs[rx]) {
+ assert(LiveRegs[rx]->Refs && "Bad refcount");
+ if (--LiveRegs[rx]->Refs == 0) Pool.Recycle(LiveRegs[rx]);
+ }
+ LiveRegs[rx] = dv;
+ if (dv) ++dv->Refs;
+}
+
+// Kill register rx, recycle or collapse any DomainValue.
+void SSEDomainFixPass::Kill(int rx) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ if (!LiveRegs || !LiveRegs[rx]) return;
+
+ // Before killing the last reference to an open DomainValue, collapse it to
+ // the first available domain.
+ if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->collapsed())
+ Collapse(LiveRegs[rx], LiveRegs[rx]->firstDomain());
+ else
+ SetLiveReg(rx, 0);
+}
+
+/// Force register rx into domain.
+void SSEDomainFixPass::Force(int rx, unsigned domain) {
+ assert(unsigned(rx) < NumRegs && "Invalid index");
+ DomainValue *dv;
+ if (LiveRegs && (dv = LiveRegs[rx])) {
+ if (dv->collapsed())
+ dv->add(domain);
+ else
+ Collapse(dv, domain);
+ } else {
+ // Set up basic collapsed DomainValue.
+ dv = Pool.Alloc();
+ dv->Dist = Distance;
+ dv->add(domain);
+ SetLiveReg(rx, dv);
+ }
+}
+
+/// Collapse open DomainValue into given domain. If there are multiple
+/// registers using dv, they each get a unique collapsed DomainValue.
+void SSEDomainFixPass::Collapse(DomainValue *dv, unsigned domain) {
+ assert(dv->compat(1u << domain) && "Cannot collapse");
+
+ // Collapse all the instructions.
+ while (!dv->Instrs.empty()) {
+ MachineInstr *mi = dv->Instrs.back();
+ TII->SetSSEDomain(mi, domain);
+ dv->Instrs.pop_back();
+ }
+ dv->Mask = 1u << domain;
+
+ // If there are multiple users, give them new, unique DomainValues.
+ if (LiveRegs && dv->Refs > 1) {
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == dv) {
+ DomainValue *dv2 = Pool.Alloc();
+ dv2->Dist = Distance;
+ dv2->add(domain);
+ SetLiveReg(rx, dv2);
+ }
+ }
+}
+
+/// Merge - All instructions and registers in B are moved to A, and B is
+/// released.
+bool SSEDomainFixPass::Merge(DomainValue *A, DomainValue *B) {
+ assert(!A->collapsed() && "Cannot merge into collapsed");
+ assert(!B->collapsed() && "Cannot merge from collapsed");
+ if (A == B)
+ return true;
+ if (!A->compat(B->Mask))
+ return false;
+ A->Mask &= B->Mask;
+ A->Dist = std::max(A->Dist, B->Dist);
+ A->Instrs.append(B->Instrs.begin(), B->Instrs.end());
+ for (unsigned rx = 0; rx != NumRegs; ++rx)
+ if (LiveRegs[rx] == B)
+ SetLiveReg(rx, A);
+ return true;
+}
+
+void SSEDomainFixPass::enterBasicBlock() {
+ // Try to coalesce live-out registers from predecessors.
+ for (MachineBasicBlock::const_livein_iterator i = MBB->livein_begin(),
+ e = MBB->livein_end(); i != e; ++i) {
+ int rx = RegIndex(*i);
+ if (rx < 0) continue;
+ for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
+ pe = MBB->pred_end(); pi != pe; ++pi) {
+ LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
+ if (fi == LiveOuts.end()) continue;
+ DomainValue *pdv = fi->second[rx];
+ if (!pdv) continue;
+ if (!LiveRegs || !LiveRegs[rx]) {
+ SetLiveReg(rx, pdv);
+ continue;
+ }
+
+ // We have a live DomainValue from more than one predecessor.
+ if (LiveRegs[rx]->collapsed()) {
+ // We are already collapsed, but predecessor is not. Force him.
+ if (!pdv->collapsed())
+ Collapse(pdv, LiveRegs[rx]->firstDomain());
+ continue;
+ }
+
+ // Currently open, merge in predecessor.
+ if (!pdv->collapsed())
+ Merge(LiveRegs[rx], pdv);
+ else
+ Collapse(LiveRegs[rx], pdv->firstDomain());
+ }
+ }
+}
+
+// A hard instruction only works in one domain. All input registers will be
+// forced into that domain.
+void SSEDomainFixPass::visitHardInstr(MachineInstr *mi, unsigned domain) {
+ // Collapse all uses.
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Force(rx, domain);
+ }
+
+ // Kill all defs and force them.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ Force(rx, domain);
+ }
+}
+
+// A soft instruction can be changed to work in other domains given by mask.
+void SSEDomainFixPass::visitSoftInstr(MachineInstr *mi, unsigned mask) {
+ // Scan the explicit use operands for incoming domains.
+ unsigned collmask = mask;
+ SmallVector<int, 4> used;
+ if (LiveRegs)
+ for (unsigned i = mi->getDesc().getNumDefs(),
+ e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (DomainValue *dv = LiveRegs[rx]) {
+ // Is it possible to use this collapsed register for free?
+ if (dv->collapsed()) {
+ if (unsigned m = collmask & dv->Mask)
+ collmask = m;
+ } else if (dv->compat(collmask))
+ used.push_back(rx);
+ else
+ Kill(rx);
+ }
+ }
+
+ // If the collapsed operands force a single domain, propagate the collapse.
+ if (isPowerOf2_32(collmask)) {
+ unsigned domain = CountTrailingZeros_32(collmask);
+ TII->SetSSEDomain(mi, domain);
+ visitHardInstr(mi, domain);
+ return;
+ }
+
+ // Kill off any remaining uses that don't match collmask, and build a list of
+ // incoming DomainValue that we want to merge.
+ SmallVector<DomainValue*,4> doms;
+ for (SmallVector<int, 4>::iterator i=used.begin(), e=used.end(); i!=e; ++i) {
+ int rx = *i;
+ DomainValue *dv = LiveRegs[rx];
+ // This useless DomainValue could have been missed above.
+ if (!dv->compat(collmask)) {
+ Kill(*i);
+ continue;
+ }
+ // sorted, uniqued insert.
+ bool inserted = false;
+ for (SmallVector<DomainValue*,4>::iterator i = doms.begin(), e = doms.end();
+ i != e && !inserted; ++i) {
+ if (dv == *i)
+ inserted = true;
+ else if (dv->Dist < (*i)->Dist) {
+ inserted = true;
+ doms.insert(i, dv);
+ }
+ }
+ if (!inserted)
+ doms.push_back(dv);
+ }
+
+ // doms are now sorted in order of appearance. Try to merge them all, giving
+ // priority to the latest ones.
+ DomainValue *dv = 0;
+ while (!doms.empty()) {
+ if (!dv) {
+ dv = doms.pop_back_val();
+ continue;
+ }
+
+ DomainValue *ThisDV = doms.pop_back_val();
+ if (Merge(dv, ThisDV)) continue;
+
+ for (SmallVector<int,4>::iterator i=used.begin(), e=used.end(); i != e; ++i)
+ if (LiveRegs[*i] == ThisDV)
+ Kill(*i);
+ }
+
+ // dv is the DomainValue we are going to use for this instruction.
+ if (!dv)
+ dv = Pool.Alloc();
+ dv->Dist = Distance;
+ dv->Mask = collmask;
+ dv->Instrs.push_back(mi);
+
+ // Finally set all defs and non-collapsed uses to dv.
+ for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) {
+ Kill(rx);
+ SetLiveReg(rx, dv);
+ }
+ }
+}
+
+void SSEDomainFixPass::visitGenericInstr(MachineInstr *mi) {
+ // Process explicit defs, kill any XMM registers redefined.
+ for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) {
+ MachineOperand &mo = mi->getOperand(i);
+ if (!mo.isReg()) continue;
+ int rx = RegIndex(mo.getReg());
+ if (rx < 0) continue;
+ Kill(rx);
+ }
+}
+
+bool SSEDomainFixPass::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ TII = static_cast<const X86InstrInfo*>(MF->getTarget().getInstrInfo());
+ TRI = MF->getTarget().getRegisterInfo();
+ MBB = 0;
+ LiveRegs = 0;
+ Distance = 0;
+ assert(NumRegs == X86::VR128RegClass.getNumRegs() && "Bad regclass");
+
+ // If no XMM registers are used in the function, we can skip it completely.
+ bool anyregs = false;
+ for (TargetRegisterClass::const_iterator I = X86::VR128RegClass.begin(),
+ E = X86::VR128RegClass.end(); I != E; ++I)
+ if (MF->getRegInfo().isPhysRegUsed(*I)) {
+ anyregs = true;
+ break;
+ }
+ if (!anyregs) return false;
+
+ MachineBasicBlock *Entry = MF->begin();
+ SmallPtrSet<MachineBasicBlock*, 16> Visited;
+ for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*, 16> >
+ DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited);
+ DFI != DFE; ++DFI) {
+ MBB = *DFI;
+ enterBasicBlock();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *mi = I;
+ if (mi->isDebugValue()) continue;
+ ++Distance;
+ std::pair<uint16_t, uint16_t> domp = TII->GetSSEDomain(mi);
+ if (domp.first)
+ if (domp.second)
+ visitSoftInstr(mi, domp.second);
+ else
+ visitHardInstr(mi, domp.first);
+ else if (LiveRegs)
+ visitGenericInstr(mi);
+ }
+
+ // Save live registers at end of MBB - used by enterBasicBlock().
+ if (LiveRegs)
+ LiveOuts.insert(std::make_pair(MBB, LiveRegs));
+ LiveRegs = 0;
+ }
+
+ // Clear the LiveOuts vectors. Should we also collapse any remaining
+ // DomainValues?
+ for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end();
+ i != e; ++i)
+ free(i->second);
+ LiveOuts.clear();
+ Pool.Clear();
+
+ return false;
+}
+
+FunctionPass *llvm::createSSEDomainFixPass() {
+ return new SSEDomainFixPass();
+}
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index c753cf2..9be38a4 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -41,6 +41,10 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
///
FunctionPass *createX86FloatingPointStackifierPass();
+/// createSSEDomainFixPass - This pass twiddles SSE opcodes to prevent domain
+/// crossings.
+FunctionPass *createSSEDomainFixPass();
+
/// createX87FPRegKillInserterPass - This function returns a pass which
/// inserts FP_REG_KILL instructions where needed.
///
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 2be51e1..6b62795 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -59,6 +59,9 @@ def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
[FeatureCMOV]>;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
+def FeatureFastUAMem : SubtargetFeature<"fast-unaligned-mem",
+ "IsUAMemFast", "true",
+ "Fast unaligned memory access">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">;
@@ -98,8 +101,10 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
+def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureFastUAMem]>;
// Sandy Bridge does not have FMA
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
@@ -160,10 +165,11 @@ def X86InstrInfo : InstrInfo {
"hasAdSizePrefix",
"Prefix",
"hasREX_WPrefix",
- "ImmTypeBits",
- "FPFormBits",
+ "ImmT.Value",
+ "FPForm.Value",
"hasLockPrefix",
"SegOvrBits",
+ "ExeDomain.Value",
"Opcode"];
let TSFlagsShifts = [0,
6,
@@ -174,6 +180,7 @@ def X86InstrInfo : InstrInfo {
16,
19,
20,
+ 22,
24];
}
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 754a200..8e2928c3 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -10,10 +10,14 @@
#include "llvm/Target/TargetAsmBackend.h"
#include "X86.h"
#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MachObjectWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
@@ -48,8 +52,135 @@ public:
for (unsigned i = 0; i != Size; ++i)
DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8));
}
+
+ bool MayNeedRelaxation(const MCInst &Inst,
+ const SmallVectorImpl<MCAsmFixup> &Fixups) const;
+
+ void RelaxInstruction(const MCInstFragment *IF, MCInst &Res) const;
+
+ bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const;
};
+static unsigned getRelaxedOpcode(unsigned Op) {
+ switch (Op) {
+ default:
+ return Op;
+
+ case X86::JAE_1: return X86::JAE_4;
+ case X86::JA_1: return X86::JA_4;
+ case X86::JBE_1: return X86::JBE_4;
+ case X86::JB_1: return X86::JB_4;
+ case X86::JE_1: return X86::JE_4;
+ case X86::JGE_1: return X86::JGE_4;
+ case X86::JG_1: return X86::JG_4;
+ case X86::JLE_1: return X86::JLE_4;
+ case X86::JL_1: return X86::JL_4;
+ case X86::JMP_1: return X86::JMP_4;
+ case X86::JNE_1: return X86::JNE_4;
+ case X86::JNO_1: return X86::JNO_4;
+ case X86::JNP_1: return X86::JNP_4;
+ case X86::JNS_1: return X86::JNS_4;
+ case X86::JO_1: return X86::JO_4;
+ case X86::JP_1: return X86::JP_4;
+ case X86::JS_1: return X86::JS_4;
+ }
+}
+
+bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst,
+ const SmallVectorImpl<MCAsmFixup> &Fixups) const {
+ // Check for a 1byte pcrel fixup, and enforce that we would know how to relax
+ // this instruction.
+ for (unsigned i = 0, e = Fixups.size(); i != e; ++i) {
+ if (unsigned(Fixups[i].Kind) == X86::reloc_pcrel_1byte) {
+ assert(getRelaxedOpcode(Inst.getOpcode()) != Inst.getOpcode());
+ return true;
+ }
+ }
+
+ return false;
+}
+
+// FIXME: Can tblgen help at all here to verify there aren't other instructions
+// we can relax?
+void X86AsmBackend::RelaxInstruction(const MCInstFragment *IF,
+ MCInst &Res) const {
+ // The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.
+ unsigned RelaxedOp = getRelaxedOpcode(IF->getInst().getOpcode());
+
+ if (RelaxedOp == IF->getInst().getOpcode()) {
+ SmallString<256> Tmp;
+ raw_svector_ostream OS(Tmp);
+ IF->getInst().dump_pretty(OS);
+ llvm_report_error("unexpected instruction to relax: " + OS.str());
+ }
+
+ Res = IF->getInst();
+ Res.setOpcode(RelaxedOp);
+}
+
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes. This returns the number of bytes written. It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+///
+/// FIXME this is X86 32-bit specific and should move to a better place.
+bool X86AsmBackend::WriteNopData(uint64_t Count, MCObjectWriter *OW) const {
+ static const uint8_t Nops[16][16] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopl 0L(%[re]ax) */
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
+ };
+
+ // Write an optimal sequence for the first 15 bytes.
+ uint64_t OptimalCount = (Count < 16) ? Count : 15;
+ for (uint64_t i = 0, e = OptimalCount; i != e; i++)
+ OW->Write8(Nops[OptimalCount - 1][i]);
+
+ // Finish with single byte nops.
+ for (uint64_t i = OptimalCount, e = Count; i != e; ++i)
+ OW->Write8(0x90);
+
+ return true;
+}
+
+/* *** */
+
class ELFX86AsmBackend : public X86AsmBackend {
public:
ELFX86AsmBackend(const Target &T)
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 5d3edbb..c69eeb3 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -383,7 +383,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
// They have to fit in the 32-bit signed displacement field though.
- if (isInt32(Disp)) {
+ if (isInt<32>(Disp)) {
AM.Disp = (uint32_t)Disp;
return X86SelectAddress(U->getOperand(0), AM);
}
@@ -427,7 +427,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
}
}
// Check for displacement overflow.
- if (!isInt32(Disp))
+ if (!isInt<32>(Disp))
break;
// Ok, the GEP indices were covered by constant-offset and scaled-index
// addressing. Update the address state and move on to examining the base.
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 704f9c6..b24d5a1 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -802,6 +802,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
if (!VT.is128BitVector()) {
continue;
}
+
setOperationAction(ISD::AND, SVT, Promote);
AddPromotedToType (ISD::AND, SVT, MVT::v2i64);
setOperationAction(ISD::OR, SVT, Promote);
@@ -1008,7 +1009,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// FIXME: These should be based on subtarget info. Plus, the values should
// be smaller when we are in optimizing for size mode.
maxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
- maxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
+ maxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
maxStoresPerMemmove = 3; // For @llvm.memmove -> sequence of stores
setPrefLoopAlignment(16);
benefitFromCodePlacementOpt = true;
@@ -1066,23 +1067,37 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
}
/// getOptimalMemOpType - Returns the target specific optimal type for load
-/// and store operations as a result of memset, memcpy, and memmove
-/// lowering. It returns MVT::iAny if SelectionDAG should be responsible for
-/// determining it.
+/// and store operations as a result of memset, memcpy, and memmove lowering.
+/// If DstAlign is zero that means it's safe to destination alignment can
+/// satisfy any constraint. Similarly if SrcAlign is zero it means there
+/// isn't a need to check it against alignment requirement, probably because
+/// the source does not need to be loaded. It returns EVT::Other if
+/// SelectionDAG should be responsible for determining it.
EVT
-X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
+X86TargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP,
SelectionDAG &DAG) const {
// FIXME: This turns off use of xmm stores for memset/memcpy on targets like
// linux. This is because the stack realignment code can't handle certain
// cases like PR2962. This should be removed when PR2962 is fixed.
const Function *F = DAG.getMachineFunction().getFunction();
- bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat);
- if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) {
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16)
- return MVT::v4i32;
- if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16)
- return MVT::v4f32;
+ if (!F->hasFnAttr(Attribute::NoImplicitFloat)) {
+ if (Size >= 16 &&
+ (Subtarget->isUnalignedMemAccessFast() ||
+ ((DstAlign == 0 || DstAlign >= 16) &&
+ (SrcAlign == 0 || SrcAlign >= 16))) &&
+ Subtarget->getStackAlignment() >= 16) {
+ if (Subtarget->hasSSE2())
+ return MVT::v4i32;
+ if (SafeToUseFP && Subtarget->hasSSE1())
+ return MVT::v4f32;
+ } else if (SafeToUseFP &&
+ Size >= 8 &&
+ !Subtarget->is64Bit() &&
+ Subtarget->getStackAlignment() >= 8 &&
+ Subtarget->hasSSE2())
+ return MVT::f64;
}
if (Subtarget->is64Bit() && Size >= 8)
return MVT::i64;
@@ -1108,8 +1123,8 @@ MCSymbol *
X86TargetLowering::getPICBaseSymbol(const MachineFunction *MF,
MCContext &Ctx) const {
const MCAsmInfo &MAI = *getTargetMachine().getMCAsmInfo();
- return Ctx.GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix())+
- Twine(MF->getFunctionNumber())+"$pb");
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(MF->getFunctionNumber())+"$pb");
}
@@ -2290,6 +2305,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
return false;
// If -tailcallopt is specified, make fastcc functions tail-callable.
+ const MachineFunction &MF = DAG.getMachineFunction();
const Function *CallerF = DAG.getMachineFunction().getFunction();
if (GuaranteedTailCallOpt) {
if (IsTailCallConvention(CalleeCC) &&
@@ -2301,8 +2317,14 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// Look for obvious safe cases to perform tail call optimization that does not
// requite ABI changes. This is what gcc calls sibcall.
- // Do not sibcall optimize vararg calls for now.
- if (isVarArg)
+ // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
+ // emit a special epilogue.
+ if (RegInfo->needsStackRealignment(MF))
+ return false;
+
+ // Do not sibcall optimize vararg calls unless the call site is not passing any
+ // arguments.
+ if (isVarArg && !Outs.empty())
return false;
// Also avoid sibcall optimization if either caller or callee uses struct
@@ -2417,7 +2439,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
bool hasSymbolicDisplacement) {
// Offset should fit into 32 bit immediate field.
- if (!isInt32(Offset))
+ if (!isInt<32>(Offset))
return false;
// If we don't have a symbolic displacement - we don't have any extra
@@ -3613,6 +3635,69 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
return SDValue();
}
+/// EltsFromConsecutiveLoads - Given the initializing elements 'Elts' of a
+/// vector of type 'VT', see if the elements can be replaced by a single large
+/// load which has the same value as a build_vector whose operands are 'elts'.
+///
+/// Example: <load i32 *a, load i32 *a+4, undef, undef> -> zextload a
+///
+/// FIXME: we'd also like to handle the case where the last elements are zero
+/// rather than undef via VZEXT_LOAD, but we do not detect that case today.
+/// There's even a handy isZeroNode for that purpose.
+static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts,
+ DebugLoc &dl, SelectionDAG &DAG) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElems = Elts.size();
+
+ LoadSDNode *LDBase = NULL;
+ unsigned LastLoadedElt = -1U;
+
+ // For each element in the initializer, see if we've found a load or an undef.
+ // If we don't find an initial load element, or later load elements are
+ // non-consecutive, bail out.
+ for (unsigned i = 0; i < NumElems; ++i) {
+ SDValue Elt = Elts[i];
+
+ if (!Elt.getNode() ||
+ (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
+ return SDValue();
+ if (!LDBase) {
+ if (Elt.getNode()->getOpcode() == ISD::UNDEF)
+ return SDValue();
+ LDBase = cast<LoadSDNode>(Elt.getNode());
+ LastLoadedElt = i;
+ continue;
+ }
+ if (Elt.getOpcode() == ISD::UNDEF)
+ continue;
+
+ LoadSDNode *LD = cast<LoadSDNode>(Elt);
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
+ return SDValue();
+ LastLoadedElt = i;
+ }
+
+ // If we have found an entire vector of loads and undefs, then return a large
+ // load of the entire vector width starting at the base pointer. If we found
+ // consecutive loads for the low half, generate a vzext_load node.
+ if (LastLoadedElt == NumElems - 1) {
+ if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16)
+ return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(), 0);
+ return DAG.getLoad(VT, dl, LDBase->getChain(), LDBase->getBasePtr(),
+ LDBase->getSrcValue(), LDBase->getSrcValueOffset(),
+ LDBase->isVolatile(), LDBase->isNonTemporal(),
+ LDBase->getAlignment());
+ } else if (NumElems == 4 && LastLoadedElt == 1) {
+ SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
+ SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
+ SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
+ }
+ return SDValue();
+}
+
SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
@@ -3841,14 +3926,18 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
return DAG.getVectorShuffle(VT, dl, V[0], V[1], &MaskVec[0]);
}
- if (Values.size() > 2) {
- // If we have SSE 4.1, Expand into a number of inserts unless the number of
- // values to be inserted is equal to the number of elements, in which case
- // use the unpack code below in the hopes of matching the consecutive elts
- // load merge pattern for shuffles.
- // FIXME: We could probably just check that here directly.
- if (Values.size() < NumElems && VT.getSizeInBits() == 128 &&
- getSubtarget()->hasSSE41()) {
+ if (Values.size() > 1 && VT.getSizeInBits() == 128) {
+ // Check for a build vector of consecutive loads.
+ for (unsigned i = 0; i < NumElems; ++i)
+ V[i] = Op.getOperand(i);
+
+ // Check for elements which are consecutive loads.
+ SDValue LD = EltsFromConsecutiveLoads(VT, V, dl, DAG);
+ if (LD.getNode())
+ return LD;
+
+ // For SSE 4.1, use inserts into undef.
+ if (getSubtarget()->hasSSE41()) {
V[0] = DAG.getUNDEF(VT);
for (unsigned i = 0; i < NumElems; ++i)
if (Op.getOperand(i).getOpcode() != ISD::UNDEF)
@@ -3856,7 +3945,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
Op.getOperand(i), DAG.getIntPtrConstant(i));
return V[0];
}
- // Expand into a number of unpckl*.
+
+ // Otherwise, expand into a number of unpckl*
// e.g. for v4f32
// Step 1: unpcklps 0, 2 ==> X: <?, ?, 2, 0>
// : unpcklps 1, 3 ==> Y: <?, ?, 3, 1>
@@ -3871,7 +3961,6 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
return V[0];
}
-
return SDValue();
}
@@ -8797,83 +8886,24 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
- EVT EltVT, LoadSDNode *&LDBase,
- unsigned &LastLoadedElt,
- SelectionDAG &DAG, MachineFrameInfo *MFI,
- const TargetLowering &TLI) {
- LDBase = NULL;
- LastLoadedElt = -1U;
- for (unsigned i = 0; i < NumElems; ++i) {
- if (N->getMaskElt(i) < 0) {
- if (!LDBase)
- return false;
- continue;
- }
-
- SDValue Elt = DAG.getShuffleScalarElt(N, i);
- if (!Elt.getNode() ||
- (Elt.getOpcode() != ISD::UNDEF && !ISD::isNON_EXTLoad(Elt.getNode())))
- return false;
- if (!LDBase) {
- if (Elt.getNode()->getOpcode() == ISD::UNDEF)
- return false;
- LDBase = cast<LoadSDNode>(Elt.getNode());
- LastLoadedElt = i;
- continue;
- }
- if (Elt.getOpcode() == ISD::UNDEF)
- continue;
-
- LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
- return false;
- LastLoadedElt = i;
- }
- return true;
-}
-
/// PerformShuffleCombine - Combine a vector_shuffle that is equal to
/// build_vector load1, load2, load3, load4, <0, 1, 2, 3> into a 128-bit load
/// if the load addresses are consecutive, non-overlapping, and in the right
-/// order. In the case of v2i64, it will see if it can rewrite the
-/// shuffle to be an appropriate build vector so it can take advantage of
-// performBuildVectorCombine.
+/// order.
static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
DebugLoc dl = N->getDebugLoc();
EVT VT = N->getValueType(0);
- EVT EltVT = VT.getVectorElementType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
- unsigned NumElems = VT.getVectorNumElements();
if (VT.getSizeInBits() != 128)
return SDValue();
- // Try to combine a vector_shuffle into a 128-bit load.
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
- LoadSDNode *LD = NULL;
- unsigned LastLoadedElt;
- if (!EltsFromConsecutiveLoads(SVN, NumElems, EltVT, LD, LastLoadedElt, DAG,
- MFI, TLI))
- return SDValue();
-
- if (LastLoadedElt == NumElems - 1) {
- if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(), 0);
- return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- } else if (NumElems == 4 && LastLoadedElt == 1) {
- SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other);
- SDValue Ops[] = { LD->getChain(), LD->getBasePtr() };
- SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode);
- }
- return SDValue();
+ SmallVector<SDValue, 16> Elts;
+ for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
+ Elts.push_back(DAG.getShuffleScalarElt(SVN, i));
+
+ return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
/// PerformShuffleCombine - Detect vector gather/scatter index generation
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 0f15eba..4549cba 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -417,12 +417,15 @@ namespace llvm {
virtual unsigned getByValTypeAlignment(const Type *Ty) const;
/// getOptimalMemOpType - Returns the target specific optimal type for load
- /// and store operations as a result of memset, memcpy, and memmove
- /// lowering. It returns EVT::iAny if SelectionDAG should be responsible for
- /// determining it.
- virtual EVT getOptimalMemOpType(uint64_t Size, unsigned Align,
- bool isSrcConst, bool isSrcStr,
- SelectionDAG &DAG) const;
+ /// and store operations as a result of memset, memcpy, and memmove lowering.
+ /// If DstAlign is zero that means it's safe to destination alignment can
+ /// satisfy any constraint. Similarly if SrcAlign is zero it means there
+ /// isn't a need to check it against alignment requirement, probably because
+ /// the source does not need to be loaded. It returns EVT::Other if
+ /// SelectionDAG should be responsible for determining it.
+ virtual EVT getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool SafeToUseFP, SelectionDAG &DAG) const;
/// allowsUnalignedMemoryAccesses - Returns true if the target allows
/// unaligned memory accesses. of the specified type.
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 8cbb756..eef2ca0 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -295,19 +295,17 @@ def BSWAP64r : RI<0xC8, AddRegFrm, (outs GR64:$dst), (ins GR64:$src),
let Defs = [EFLAGS] in {
def BSF64rr : RI<0xBC, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsf GR64:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsf GR64:$src))]>, TB;
def BSF64rm : RI<0xBC, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsf{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsf (loadi64 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsf (loadi64 addr:$src)))]>, TB;
def BSR64rr : RI<0xBD, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsr GR64:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr GR64:$src))]>, TB;
def BSR64rm : RI<0xBD, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"bsr{q}\t{$src, $dst|$dst, $src}",
- [(set GR64:$dst, (X86bsr (loadi64 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS, (X86bsr (loadi64 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
// Repeat string ops
@@ -508,8 +506,8 @@ let isCommutable = 1 in
def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, GR64:$src2))]>;
// These are alternate spellings for use by the disassembler, we mark them as
// code gen only to ensure they aren't matched by the assembler.
@@ -523,21 +521,21 @@ let isCodeGenOnly = 1 in {
def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, i64immSExt8:$src2))]>;
def ADD64ri32 : RIi32<0x81, MRM0r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isConvertibleToThreeAddress
// Register-Memory Addition
def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"add{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86add_flag GR64:$src1, (load addr:$src2)))]>;
} // isTwoAddress
@@ -604,8 +602,8 @@ let isTwoAddress = 1 in {
def SUB64rr : RI<0x29, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, GR64:$src2))]>;
def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
@@ -615,20 +613,20 @@ def SUB64rr_REV : RI<0x2B, MRMSrcReg, (outs GR64:$dst),
def SUB64rm : RI<0x2B, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, (load addr:$src2)))]>;
// Register-Integer Subtraction
def SUB64ri8 : RIi8<0x83, MRM5r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, i64immSExt8:$src2))]>;
def SUB64ri32 : RIi32<0x81, MRM5r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"sub{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (sub GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86sub_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def SUB64i32 : RIi32<0x2D, RawFrm, (outs), (ins i32imm:$src),
@@ -716,15 +714,15 @@ let isCommutable = 1 in
def IMUL64rr : RI<0xAF, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, GR64:$src2))]>, TB;
// Register-Memory Signed Integer Multiplication
def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"imul{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, TB;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, (load addr:$src2)))]>, TB;
} // isTwoAddress
// Suprisingly enough, these are not two address instructions!
@@ -733,27 +731,27 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
def IMUL64rri8 : RIi8<0x6B, MRMSrcReg, // GR64 = GR64*I8
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt8:$src2))]>;
def IMUL64rri32 : RIi32<0x69, MRMSrcReg, // GR64 = GR64*I32
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag GR64:$src1, i64immSExt32:$src2))]>;
// Memory-Integer Signed Integer Multiplication
def IMUL64rmi8 : RIi8<0x6B, MRMSrcMem, // GR64 = [mem64]*I8
(outs GR64:$dst), (ins i64mem:$src1, i64i8imm: $src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul (load addr:$src1),
- i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt8:$src2))]>;
def IMUL64rmi32 : RIi32<0x69, MRMSrcMem, // GR64 = [mem64]*I32
(outs GR64:$dst), (ins i64mem:$src1, i64i32imm:$src2),
"imul{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR64:$dst, (mul (load addr:$src1),
- i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i64immSExt32:$src2))]>;
} // Defs = [EFLAGS]
// Unsigned division / remainder
@@ -787,16 +785,14 @@ def NEG64m : RI<0xF7, MRM3m, (outs), (ins i64mem:$dst), "neg{q}\t$dst",
let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
def INC64r : RI<0xFF, MRM0r, (outs GR64:$dst), (ins GR64:$src), "inc{q}\t$dst",
- [(set GR64:$dst, (add GR64:$src, 1)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS, (X86inc_flag GR64:$src))]>;
def INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "inc{q}\t$dst",
[(store (add (loadi64 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)]>;
let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in
def DEC64r : RI<0xFF, MRM1r, (outs GR64:$dst), (ins GR64:$src), "dec{q}\t$dst",
- [(set GR64:$dst, (add GR64:$src, -1)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS, (X86dec_flag GR64:$src))]>;
def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
[(store (add (loadi64 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)]>;
@@ -806,23 +802,19 @@ let isTwoAddress = 1, isConvertibleToThreeAddress = 1 in {
// Can transform into LEA.
def INC64_16r : I<0xFF, MRM0r, (outs GR16:$dst), (ins GR16:$src),
"inc{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, 1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
OpSize, Requires<[In64BitMode]>;
def INC64_32r : I<0xFF, MRM0r, (outs GR32:$dst), (ins GR32:$src),
"inc{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, 1)),
- (implicit EFLAGS)]>,
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
Requires<[In64BitMode]>;
def DEC64_16r : I<0xFF, MRM1r, (outs GR16:$dst), (ins GR16:$src),
"dec{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, -1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
OpSize, Requires<[In64BitMode]>;
def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src),
"dec{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, -1)),
- (implicit EFLAGS)]>,
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress
@@ -1092,26 +1084,26 @@ let isCommutable = 1 in
def AND64rr : RI<0x21, MRMDestReg,
(outs GR64:$dst), (ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, GR64:$src2))]>;
def AND64rr_REV : RI<0x23, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}", []>;
def AND64rm : RI<0x23, MRMSrcMem,
(outs GR64:$dst), (ins GR64:$src1, i64mem:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, (load addr:$src2)))]>;
def AND64ri8 : RIi8<0x83, MRM4r,
(outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, i64immSExt8:$src2))]>;
def AND64ri32 : RIi32<0x81, MRM4r,
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"and{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (and GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86and_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def AND64mr : RI<0x21, MRMDestMem,
@@ -1135,26 +1127,26 @@ let isCommutable = 1 in
def OR64rr : RI<0x09, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, GR64:$src2))]>;
def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}", []>;
def OR64rm : RI<0x0B, MRMSrcMem , (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, (load addr:$src2)))]>;
def OR64ri8 : RIi8<0x83, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, i64immSExt8:$src2))]>;
def OR64ri32 : RIi32<0x81, MRM1r, (outs GR64:$dst),
(ins GR64:$src1, i64i32imm:$src2),
"or{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (or GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86or_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def OR64mr : RI<0x09, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
@@ -1178,26 +1170,26 @@ let isCommutable = 1 in
def XOR64rr : RI<0x31, MRMDestReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, GR64:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, GR64:$src2))]>;
def XOR64rr_REV : RI<0x33, MRMSrcReg, (outs GR64:$dst),
(ins GR64:$src1, GR64:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}", []>;
def XOR64rm : RI<0x33, MRMSrcMem, (outs GR64:$dst),
(ins GR64:$src1, i64mem:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, (load addr:$src2)))]>;
def XOR64ri8 : RIi8<0x83, MRM6r, (outs GR64:$dst),
(ins GR64:$src1, i64i8imm:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, i64immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, i64immSExt8:$src2))]>;
def XOR64ri32 : RIi32<0x81, MRM6r,
(outs GR64:$dst), (ins GR64:$src1, i64i32imm:$src2),
"xor{q}\t{$src2, $dst|$dst, $src2}",
- [(set GR64:$dst, (xor GR64:$src1, i64immSExt32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR64:$dst, EFLAGS,
+ (X86xor_flag GR64:$src1, i64immSExt32:$src2))]>;
} // isTwoAddress
def XOR64mr : RI<0x31, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
@@ -2181,14 +2173,11 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
} // AddedComplexity
@@ -2215,136 +2204,76 @@ def : Pat<(subc GR64:$src1, imm:$src2),
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
-// Register-Register Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// addition
+def : Pat<(add GR64:$src1, GR64:$src2),
(ADD64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Integer Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(add GR64:$src1, i64immSExt8:$src2),
(ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86add_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(add GR64:$src1, i64immSExt32:$src2),
(ADD64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(add GR64:$src1, (loadi64 addr:$src2)),
(ADD64rm GR64:$src1, addr:$src2)>;
-// Register-Register Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// subtraction
+def : Pat<(sub GR64:$src1, GR64:$src2),
(SUB64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Memory Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(sub GR64:$src1, (loadi64 addr:$src2)),
(SUB64rm GR64:$src1, addr:$src2)>;
-
-// Register-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR64:$src1, i64immSExt8:$src2),
(SUB64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR64:$src1, i64immSExt32:$src2),
(SUB64ri32 GR64:$src1, i64immSExt32:$src2)>;
-// Register-Register Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// Multiply
+def : Pat<(mul GR64:$src1, GR64:$src2),
(IMUL64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Memory Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(mul GR64:$src1, (loadi64 addr:$src2)),
(IMUL64rm GR64:$src1, addr:$src2)>;
-
-// Register-Integer Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR64:$src1, i64immSExt8:$src2),
(IMUL64rri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR64:$src1, i64immSExt32:$src2),
(IMUL64rri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Memory-Integer Signed Integer Multiplication with EFLAGS result
-def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt8:$src2),
(IMUL64rmi8 addr:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
(IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>;
-// INC and DEC with EFLAGS result. Note that these do not set CF.
-def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
- (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
- (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
-
-def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
- (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
- (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
-
-def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)),
- (INC64r GR64:$src)>;
-def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)),
- (DEC64r GR64:$src)>;
-
-// Register-Register Logical Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
- (OR64rr GR64:$src1, GR64:$src2)>;
+// inc/dec
+def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>;
+def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>;
+def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>;
-// Register-Integer Logical Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+// or
+def : Pat<(or GR64:$src1, GR64:$src2),
+ (OR64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(or GR64:$src1, i64immSExt8:$src2),
(OR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86or_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(or GR64:$src1, i64immSExt32:$src2),
(OR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Logical Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(or GR64:$src1, (loadi64 addr:$src2)),
(OR64rm GR64:$src1, addr:$src2)>;
-// Register-Register Logical XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// xor
+def : Pat<(xor GR64:$src1, GR64:$src2),
(XOR64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Integer Logical XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR64:$src1, i64immSExt8:$src2),
(XOR64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86xor_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR64:$src1, i64immSExt32:$src2),
(XOR64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Logical XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(xor GR64:$src1, (loadi64 addr:$src2)),
(XOR64rm GR64:$src1, addr:$src2)>;
-// Register-Register Logical And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2),
- (implicit EFLAGS)),
+// and
+def : Pat<(and GR64:$src1, GR64:$src2),
(AND64rr GR64:$src1, GR64:$src2)>;
-
-// Register-Integer Logical And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR64:$src1, i64immSExt8:$src2),
(AND64ri8 GR64:$src1, i64immSExt8:$src2)>;
-def : Pat<(parallel (X86and_flag GR64:$src1, i64immSExt32:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR64:$src1, i64immSExt32:$src2),
(AND64ri32 GR64:$src1, i64immSExt32:$src2)>;
-
-// Register-Memory Logical And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(and GR64:$src1, (loadi64 addr:$src2)),
(AND64rm GR64:$src1, addr:$src2)>;
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index bb81cbf..d25ec26 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -68,6 +68,16 @@ def CompareFP : FPFormat<5>;
def CondMovFP : FPFormat<6>;
def SpecialFP : FPFormat<7>;
+// Class specifying the SSE execution domain, used by the SSEDomainFix pass.
+// Keep in sync with tables in X86InstrInfo.cpp.
+class Domain<bits<2> val> {
+ bits<2> Value = val;
+}
+def GenericDomain : Domain<0>;
+def SSEPackedSingle : Domain<1>;
+def SSEPackedDouble : Domain<2>;
+def SSEPackedInt : Domain<3>;
+
// Prefix byte classes which are used to indicate to the ad-hoc machine code
// emitter that various prefix bytes are required.
class OpSize { bit hasOpSizePrefix = 1; }
@@ -93,7 +103,7 @@ class TA { bits<4> Prefix = 14; }
class TF { bits<4> Prefix = 15; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
- string AsmStr>
+ string AsmStr, Domain d = GenericDomain>
: Instruction {
let Namespace = "X86";
@@ -101,7 +111,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
Format Form = f;
bits<6> FormBits = Form.Value;
ImmType ImmT = i;
- bits<3> ImmTypeBits = ImmT.Value;
dag OutOperandList = outs;
dag InOperandList = ins;
@@ -115,20 +124,21 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bits<4> Prefix = 0; // Which prefix byte does this inst have?
bit hasREX_WPrefix = 0; // Does this inst requires the REX.W prefix?
- FPFormat FPForm; // What flavor of FP instruction is this?
- bits<3> FPFormBits = 0;
+ FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
bits<2> SegOvrBits = 0; // Segment override prefix.
+ Domain ExeDomain = d;
}
-class I<bits<8> o, Format f, dag outs, dag ins, string asm, list<dag> pattern>
- : X86Inst<o, f, NoImm, outs, ins, asm> {
+class I<bits<8> o, Format f, dag outs, dag ins, string asm,
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, NoImm, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
- list<dag> pattern>
- : X86Inst<o, f, Imm8 , outs, ins, asm> {
+ list<dag> pattern, Domain d = GenericDomain>
+ : X86Inst<o, f, Imm8, outs, ins, asm, d> {
let Pattern = pattern;
let CodeSize = 3;
}
@@ -166,7 +176,7 @@ class FPI<bits<8> o, Format F, dag outs, dag ins, string asm>
// FpI_ - Floating Point Psuedo Instruction template. Not Predicated.
class FpI_<dag outs, dag ins, FPFormat fp, list<dag> pattern>
: X86Inst<0, Pseudo, NoImm, outs, ins, ""> {
- let FPForm = fp; let FPFormBits = FPForm.Value;
+ let FPForm = fp;
let Pattern = pattern;
}
@@ -196,14 +206,16 @@ class Iseg32 <bits<8> o, Format f, dag outs, dag ins, string asm,
class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
: I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
-class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+class SSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
class PSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
class PSIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, Requires<[HasSSE1]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
+ Requires<[HasSSE1]>;
// SSE2 Instruction Templates:
//
@@ -222,10 +234,12 @@ class SSDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE2]>;
class PDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE2]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE2]>;
// SSE3 Instruction Templates:
//
@@ -235,12 +249,15 @@ class PDIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
class S3SI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedSingle>, XS,
+ Requires<[HasSSE3]>;
class S3DI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, XD,
+ Requires<[HasSSE3]>;
class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, OpSize, Requires<[HasSSE3]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
+ Requires<[HasSSE3]>;
// SSSE3 Instruction Templates:
@@ -254,10 +271,12 @@ class S3I<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
class SS38I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSSE3]>;
class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSSE3]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSSE3]>;
// SSE4.1 Instruction Templates:
//
@@ -266,17 +285,20 @@ class SS3AI<bits<8> o, Format F, dag outs, dag ins, string asm,
//
class SS48I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE41]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE41]>;
class SS4AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : Ii8<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE41]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE41]>;
// SSE4.2 Instruction Templates:
//
// SS428I - SSE 4.2 instructions with T8 prefix.
class SS428I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, T8, Requires<[HasSSE42]>;
+ : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
+ Requires<[HasSSE42]>;
// SS42FI - SSE 4.2 instructions with TF prefix.
class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -286,7 +308,8 @@ class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
// SS42AI = SSE 4.2 instructions with TA prefix
class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TA, Requires<[HasSSE42]>;
+ : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+ Requires<[HasSSE42]>;
// X86-64 Instruction templates...
//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 139a905..c0c9d98 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -597,7 +597,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::PMULHUWrr, X86::PMULHUWrm, 16 },
{ X86::PMULHWrr, X86::PMULHWrm, 16 },
{ X86::PMULLDrr, X86::PMULLDrm, 16 },
- { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 },
{ X86::PMULLWrr, X86::PMULLWrm, 16 },
{ X86::PMULUDQrr, X86::PMULUDQrm, 16 },
{ X86::PORrr, X86::PORrm, 16 },
@@ -992,8 +991,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
/// a few instructions in each direction it assumes it's not safe.
static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator E = MBB.end();
+
// It's always safe to clobber EFLAGS at the end of a block.
- if (I == MBB.end())
+ if (I == E)
return true;
// For compile time consideration, if we are not able to determine the
@@ -1017,20 +1018,28 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
// This instruction defines EFLAGS, no need to look any further.
return true;
++Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != E && Iter->isDebugValue())
+ ++Iter;
// If we make it to the end of the block, it's safe to clobber EFLAGS.
- if (Iter == MBB.end())
+ if (Iter == E)
return true;
}
+ MachineBasicBlock::iterator B = MBB.begin();
Iter = I;
for (unsigned i = 0; i < 4; ++i) {
// If we make it to the beginning of the block, it's safe to clobber
// EFLAGS iff EFLAGS is not live-in.
- if (Iter == MBB.begin())
+ if (Iter == B)
return !MBB.isLiveIn(X86::EFLAGS);
--Iter;
+ // Skip over DBG_VALUE.
+ while (Iter != B && Iter->isDebugValue())
+ --Iter;
+
bool SawKill = false;
for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) {
MachineOperand &MO = Iter->getOperand(j);
@@ -1677,6 +1686,8 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
// Working from the bottom, when we see a non-terminator instruction, we're
// done.
@@ -1773,6 +1784,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
while (I != MBB.begin()) {
--I;
+ if (I->isDebugValue())
+ continue;
if (I->getOpcode() != X86::JMP_4 &&
GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID)
break;
@@ -2505,7 +2518,9 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
Alignment = (*LoadMI->memoperands_begin())->getAlignment();
else
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
Alignment = 16;
break;
@@ -2535,11 +2550,13 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
SmallVector<MachineOperand,X86AddrNumOperands> MOs;
switch (LoadMI->getOpcode()) {
- case X86::V_SET0:
+ case X86::V_SET0PS:
+ case X86::V_SET0PD:
+ case X86::V_SET0PI:
case X86::V_SETALLONES:
case X86::FsFLD0SD:
case X86::FsFLD0SS: {
- // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
+ // Folding a V_SET0P? or V_SETALLONES as a load, to ease register pressure.
// Create a constant-pool entry and operands to load from it.
// Medium and large mode can't fold loads this way.
@@ -3648,3 +3665,51 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
X86FI->setGlobalBaseReg(GlobalBaseReg);
return GlobalBaseReg;
}
+
+// These are the replaceable SSE instructions. Some of these have Int variants
+// that we don't include here. We don't want to replace instructions selected
+// by intrinsics.
+static const unsigned ReplaceableInstrs[][3] = {
+ //PackedInt PackedSingle PackedDouble
+ { X86::MOVAPSmr, X86::MOVAPDmr, X86::MOVDQAmr },
+ { X86::MOVAPSrm, X86::MOVAPDrm, X86::MOVDQArm },
+ { X86::MOVAPSrr, X86::MOVAPDrr, X86::MOVDQArr },
+ { X86::MOVUPSmr, X86::MOVUPDmr, X86::MOVDQUmr },
+ { X86::MOVUPSrm, X86::MOVUPDrm, X86::MOVDQUrm },
+ { X86::MOVNTPSmr, X86::MOVNTPDmr, X86::MOVNTDQmr },
+ { X86::ANDNPSrm, X86::ANDNPDrm, X86::PANDNrm },
+ { X86::ANDNPSrr, X86::ANDNPDrr, X86::PANDNrr },
+ { X86::ANDPSrm, X86::ANDPDrm, X86::PANDrm },
+ { X86::ANDPSrr, X86::ANDPDrr, X86::PANDrr },
+ { X86::ORPSrm, X86::ORPDrm, X86::PORrm },
+ { X86::ORPSrr, X86::ORPDrr, X86::PORrr },
+ { X86::V_SET0PS, X86::V_SET0PD, X86::V_SET0PI },
+ { X86::XORPSrm, X86::XORPDrm, X86::PXORrm },
+ { X86::XORPSrr, X86::XORPDrr, X86::PXORrr },
+};
+
+// FIXME: Some shuffle and unpack instructions have equivalents in different
+// domains, but they require a bit more work than just switching opcodes.
+
+static const unsigned *lookup(unsigned opcode, unsigned domain) {
+ for (unsigned i = 0, e = array_lengthof(ReplaceableInstrs); i != e; ++i)
+ if (ReplaceableInstrs[i][domain-1] == opcode)
+ return ReplaceableInstrs[i];
+ return 0;
+}
+
+std::pair<uint16_t, uint16_t>
+X86InstrInfo::GetSSEDomain(const MachineInstr *MI) const {
+ uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ return std::make_pair(domain,
+ domain && lookup(MI->getOpcode(), domain) ? 0xe : 0);
+}
+
+void X86InstrInfo::SetSSEDomain(MachineInstr *MI, unsigned Domain) const {
+ assert(Domain>0 && Domain<4 && "Invalid execution domain");
+ uint16_t dom = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
+ assert(dom && "Not an SSE instruction");
+ const unsigned *table = lookup(MI->getOpcode(), dom);
+ assert(table && "Cannot change domain");
+ MI->setDesc(get(table[Domain-1]));
+}
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 5111719..f0bdd06 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -398,7 +398,10 @@ namespace X86II {
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Bits 22 -> 23 are unused
+ // Execution domain for SSE instructions in bits 22, 23.
+ // 0 in bits 22-23 means normal, non-SSE instruction.
+ SSEDomainShift = 22,
+
OpcodeShift = 24,
OpcodeMask = 0xFF << OpcodeShift
};
@@ -486,7 +489,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned*, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
-
+
public:
explicit X86InstrInfo(X86TargetMachine &tm);
@@ -716,6 +719,13 @@ public:
///
unsigned getGlobalBaseReg(MachineFunction *MF) const;
+ /// GetSSEDomain - Return the SSE execution domain of MI as the first element,
+ /// and a bitmask of possible arguments to SetSSEDomain ase the second.
+ std::pair<uint16_t, uint16_t> GetSSEDomain(const MachineInstr *MI) const;
+
+ /// SetSSEDomain - Set the SSEDomain of MI.
+ void SetSSEDomain(MachineInstr *MI, unsigned Domain) const;
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index c80a18d..8fccc8a 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1,4 +1,4 @@
-
+//===----------------------------------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -28,12 +28,13 @@ def SDTX86Cmov : SDTypeProfile<1, 4,
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
// Unary and binary operator instructions that set EFLAGS as a side-effect.
-def SDTUnaryArithWithFlags : SDTypeProfile<1, 1,
- [SDTCisInt<0>]>;
-def SDTBinaryArithWithFlags : SDTypeProfile<1, 2,
- [SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisInt<0>]>;
+def SDTUnaryArithWithFlags : SDTypeProfile<2, 1,
+ [SDTCisInt<0>, SDTCisVT<1, i32>]>;
+
+def SDTBinaryArithWithFlags : SDTypeProfile<2, 2,
+ [SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisInt<0>, SDTCisVT<1, i32>]>;
def SDTX86BrCond : SDTypeProfile<0, 3,
[SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>, SDTCisVT<2, i32>]>;
@@ -77,8 +78,8 @@ def SDT_X86EHRET : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisVT<1, i32>]>;
-def X86bsf : SDNode<"X86ISD::BSF", SDTIntUnaryOp>;
-def X86bsr : SDNode<"X86ISD::BSR", SDTIntUnaryOp>;
+def X86bsf : SDNode<"X86ISD::BSF", SDTUnaryArithWithFlags>;
+def X86bsr : SDNode<"X86ISD::BSR", SDTUnaryArithWithFlags>;
def X86shld : SDNode<"X86ISD::SHLD", SDTIntShiftDOp>;
def X86shrd : SDNode<"X86ISD::SHRD", SDTIntShiftDOp>;
@@ -167,6 +168,7 @@ def X86smul_flag : SDNode<"X86ISD::SMUL", SDTBinaryArithWithFlags,
[SDNPCommutative]>;
def X86umul_flag : SDNode<"X86ISD::UMUL", SDTUnaryArithWithFlags,
[SDNPCommutative]>;
+
def X86inc_flag : SDNode<"X86ISD::INC", SDTUnaryArithWithFlags>;
def X86dec_flag : SDNode<"X86ISD::DEC", SDTUnaryArithWithFlags>;
def X86or_flag : SDNode<"X86ISD::OR", SDTBinaryArithWithFlags,
@@ -323,6 +325,7 @@ def FarData : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
def NearData : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
"TM.getCodeModel() == CodeModel::Kernel">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
+def IsNotPIC : Predicate<"TM.getRelocationModel() != Reloc::PIC_">;
def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
@@ -473,15 +476,14 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{
def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))
return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
- else {
- unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
- APInt Mask = APInt::getAllOnesValue(BitWidth);
- APInt KnownZero0, KnownOne0;
- CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
- APInt KnownZero1, KnownOne1;
- CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
- return (~KnownZero0 & ~KnownZero1) == 0;
- }
+
+ unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+ APInt Mask = APInt::getAllOnesValue(BitWidth);
+ APInt KnownZero0, KnownOne0;
+ CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0);
+ APInt KnownZero1, KnownOne1;
+ CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0);
+ return (~KnownZero0 & ~KnownZero1) == 0;
}]>;
// 'shld' and 'shrd' instruction patterns. Note that even though these have
@@ -585,7 +587,7 @@ let neverHasSideEffects = 1, isNotDuplicable = 1, Uses = [ESP] in
// Return instructions.
let isTerminator = 1, isReturn = 1, isBarrier = 1,
- hasCtrlDep = 1, FPForm = SpecialFP, FPFormBits = SpecialFP.Value in {
+ hasCtrlDep = 1, FPForm = SpecialFP in {
def RET : I <0xC3, RawFrm, (outs), (ins variable_ops),
"ret",
[(X86retflag 0)]>;
@@ -806,33 +808,29 @@ let isTwoAddress = 1 in // GR32 = bswap GR32
let Defs = [EFLAGS] in {
def BSF16rr : I<0xBC, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsf GR16:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsf GR16:$src))]>, TB;
def BSF16rm : I<0xBC, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsf{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsf (loadi16 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsf (loadi16 addr:$src)))]>, TB;
def BSF32rr : I<0xBC, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsf GR32:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf GR32:$src))]>, TB;
def BSF32rm : I<0xBC, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsf{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsf (loadi32 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsf (loadi32 addr:$src)))]>, TB;
def BSR16rr : I<0xBD, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsr GR16:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsr GR16:$src))]>, TB;
def BSR16rm : I<0xBD, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
"bsr{w}\t{$src, $dst|$dst, $src}",
- [(set GR16:$dst, (X86bsr (loadi16 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR16:$dst, EFLAGS, (X86bsr (loadi16 addr:$src)))]>, TB;
def BSR32rr : I<0xBD, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsr GR32:$src)), (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr GR32:$src))]>, TB;
def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
"bsr{l}\t{$src, $dst|$dst, $src}",
- [(set GR32:$dst, (X86bsr (loadi32 addr:$src))),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS, (X86bsr (loadi32 addr:$src)))]>, TB;
} // Defs = [EFLAGS]
let neverHasSideEffects = 1 in
@@ -1697,18 +1695,17 @@ let isTwoAddress = 0 in {
let Defs = [EFLAGS] in {
let CodeSize = 2 in
def INC8r : I<0xFE, MRM0r, (outs GR8 :$dst), (ins GR8 :$src), "inc{b}\t$dst",
- [(set GR8:$dst, (add GR8:$src, 1)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86inc_flag GR8:$src))]>;
+
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def INC16r : I<0x40, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"inc{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, 1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86inc_flag GR16:$src))]>,
OpSize, Requires<[In32BitMode]>;
def INC32r : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"inc{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, 1)),
- (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+ [(set GR32:$dst, EFLAGS, (X86inc_flag GR32:$src))]>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
@@ -1726,18 +1723,16 @@ let isTwoAddress = 0, CodeSize = 2 in {
let CodeSize = 2 in
def DEC8r : I<0xFE, MRM1r, (outs GR8 :$dst), (ins GR8 :$src), "dec{b}\t$dst",
- [(set GR8:$dst, (add GR8:$src, -1)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86dec_flag GR8:$src))]>;
let isConvertibleToThreeAddress = 1, CodeSize = 1 in { // Can xform into LEA.
def DEC16r : I<0x48, AddRegFrm, (outs GR16:$dst), (ins GR16:$src),
"dec{w}\t$dst",
- [(set GR16:$dst, (add GR16:$src, -1)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86dec_flag GR16:$src))]>,
OpSize, Requires<[In32BitMode]>;
def DEC32r : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src),
"dec{l}\t$dst",
- [(set GR32:$dst, (add GR32:$src, -1)),
- (implicit EFLAGS)]>, Requires<[In32BitMode]>;
+ [(set GR32:$dst, EFLAGS, (X86dec_flag GR32:$src))]>,
+ Requires<[In32BitMode]>;
}
let isTwoAddress = 0, CodeSize = 2 in {
@@ -1758,21 +1753,20 @@ let isTwoAddress = 0, CodeSize = 2 in {
// Logical operators...
let Defs = [EFLAGS] in {
let isCommutable = 1 in { // X = AND Y, Z --> X = AND Z, Y
-def AND8rr : I<0x20, MRMDestReg,
- (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
- "and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (and GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
-def AND16rr : I<0x21, MRMDestReg,
- (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
-def AND32rr : I<0x21, MRMDestReg,
- (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
- "and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+def AND8rr : I<0x20, MRMDestReg,
+ (outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
+ "and{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1, GR8:$src2))]>;
+def AND16rr : I<0x21, MRMDestReg,
+ (outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "and{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ GR16:$src2))]>, OpSize;
+def AND32rr : I<0x21, MRMDestReg,
+ (outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
+ "and{l}\t{$src2, $dst|$dst, $src2}",
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ GR32:$src2))]>;
}
// AND instructions with the destination register in REG and the source register
@@ -1789,45 +1783,46 @@ def AND32rr_REV : I<0x23, MRMSrcReg, (outs GR32:$dst),
def AND8rm : I<0x22, MRMSrcMem,
(outs GR8 :$dst), (ins GR8 :$src1, i8mem :$src2),
"and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (and GR8:$src1, (loadi8 addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
+ (loadi8 addr:$src2)))]>;
def AND16rm : I<0x23, MRMSrcMem,
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
"and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, (loadi16 addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ (loadi16 addr:$src2)))]>,
+ OpSize;
def AND32rm : I<0x23, MRMSrcMem,
(outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, (loadi32 addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ (loadi32 addr:$src2)))]>;
def AND8ri : Ii8<0x80, MRM4r,
(outs GR8 :$dst), (ins GR8 :$src1, i8imm :$src2),
"and{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (and GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86and_flag GR8:$src1,
+ imm:$src2))]>;
def AND16ri : Ii16<0x81, MRM4r,
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ imm:$src2))]>, OpSize;
def AND32ri : Ii32<0x81, MRM4r,
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ imm:$src2))]>;
def AND16ri8 : Ii8<0x83, MRM4r,
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"and{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (and GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86and_flag GR16:$src1,
+ i16immSExt8:$src2))]>,
OpSize;
def AND32ri8 : Ii8<0x83, MRM4r,
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"and{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (and GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86and_flag GR32:$src1,
+ i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
def AND8mr : I<0x20, MRMDestMem,
@@ -1888,18 +1883,16 @@ let isCommutable = 1 in { // X = OR Y, Z --> X = OR Z, Y
def OR8rr : I<0x08, MRMDestReg, (outs GR8 :$dst),
(ins GR8 :$src1, GR8 :$src2),
"or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (or GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1, GR8:$src2))]>;
def OR16rr : I<0x09, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,GR16:$src2))]>,
+ OpSize;
def OR32rr : I<0x09, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,GR32:$src2))]>;
}
// OR instructions with the destination register in REG and the source register
@@ -1913,48 +1906,48 @@ def OR32rr_REV : I<0x0B, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}", []>;
-def OR8rm : I<0x0A, MRMSrcMem , (outs GR8 :$dst),
+def OR8rm : I<0x0A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (or GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
-def OR16rm : I<0x0B, MRMSrcMem , (outs GR16:$dst),
+ [(set GR8:$dst, EFLAGS, (X86or_flag GR8:$src1,
+ (load addr:$src2)))]>;
+def OR16rm : I<0x0B, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
-def OR32rm : I<0x0B, MRMSrcMem , (outs GR32:$dst),
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
+ (load addr:$src2)))]>,
+ OpSize;
+def OR32rm : I<0x0B, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
+ (load addr:$src2)))]>;
def OR8ri : Ii8 <0x80, MRM1r, (outs GR8 :$dst),
(ins GR8 :$src1, i8imm:$src2),
"or{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (or GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst,EFLAGS, (X86or_flag GR8:$src1, imm:$src2))]>;
def OR16ri : Ii16<0x81, MRM1r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
+ imm:$src2))]>, OpSize;
def OR32ri : Ii32<0x81, MRM1r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
+ imm:$src2))]>;
def OR16ri8 : Ii8<0x83, MRM1r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"or{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (or GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86or_flag GR16:$src1,
+ i16immSExt8:$src2))]>, OpSize;
def OR32ri8 : Ii8<0x83, MRM1r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"or{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (or GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86or_flag GR32:$src1,
+ i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
def OR8mr : I<0x08, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src),
"or{b}\t{$src, $dst|$dst, $src}",
@@ -2004,18 +1997,18 @@ let isCommutable = 1 in { // X = XOR Y, Z --> X = XOR Z, Y
def XOR8rr : I<0x30, MRMDestReg,
(outs GR8 :$dst), (ins GR8 :$src1, GR8 :$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
+ GR8:$src2))]>;
def XOR16rr : I<0x31, MRMDestReg,
(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ GR16:$src2))]>, OpSize;
def XOR32rr : I<0x31, MRMDestReg,
(outs GR32:$dst), (ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ GR32:$src2))]>;
} // isCommutable = 1
// XOR instructions with the destination register in REG and the source register
@@ -2029,49 +2022,48 @@ def XOR32rr_REV : I<0x33, MRMSrcReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}", []>;
-def XOR8rm : I<0x32, MRMSrcMem ,
+def XOR8rm : I<0x32, MRMSrcMem,
(outs GR8 :$dst), (ins GR8:$src1, i8mem :$src2),
"xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
-def XOR16rm : I<0x33, MRMSrcMem ,
+ [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1,
+ (load addr:$src2)))]>;
+def XOR16rm : I<0x33, MRMSrcMem,
(outs GR16:$dst), (ins GR16:$src1, i16mem:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ (load addr:$src2)))]>,
OpSize;
-def XOR32rm : I<0x33, MRMSrcMem ,
+def XOR32rm : I<0x33, MRMSrcMem,
(outs GR32:$dst), (ins GR32:$src1, i32mem:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
-
-def XOR8ri : Ii8<0x80, MRM6r,
- (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "xor{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (xor GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
-def XOR16ri : Ii16<0x81, MRM6r,
- (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ (load addr:$src2)))]>;
+
+def XOR8ri : Ii8<0x80, MRM6r,
+ (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "xor{b}\t{$src2, $dst|$dst, $src2}",
+ [(set GR8:$dst, EFLAGS, (X86xor_flag GR8:$src1, imm:$src2))]>;
+def XOR16ri : Ii16<0x81, MRM6r,
+ (outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "xor{w}\t{$src2, $dst|$dst, $src2}",
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ imm:$src2))]>, OpSize;
def XOR32ri : Ii32<0x81, MRM6r,
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ imm:$src2))]>;
def XOR16ri8 : Ii8<0x83, MRM6r,
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"xor{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (xor GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>,
+ [(set GR16:$dst, EFLAGS, (X86xor_flag GR16:$src1,
+ i16immSExt8:$src2))]>,
OpSize;
def XOR32ri8 : Ii8<0x83, MRM6r,
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"xor{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (xor GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86xor_flag GR32:$src1,
+ i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
def XOR8mr : I<0x30, MRMDestMem,
@@ -2118,12 +2110,12 @@ let isTwoAddress = 0 in {
[(store (xor (load addr:$dst), i32immSExt8:$src), addr:$dst),
(implicit EFLAGS)]>;
- def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
- "xor{b}\t{$src, %al|%al, $src}", []>;
- def XOR16i16 : Ii16 <0x35, RawFrm, (outs), (ins i16imm:$src),
- "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
- def XOR32i32 : Ii32 <0x35, RawFrm, (outs), (ins i32imm:$src),
- "xor{l}\t{$src, %eax|%eax, $src}", []>;
+ def XOR8i8 : Ii8 <0x34, RawFrm, (outs), (ins i8imm:$src),
+ "xor{b}\t{$src, %al|%al, $src}", []>;
+ def XOR16i16 : Ii16<0x35, RawFrm, (outs), (ins i16imm:$src),
+ "xor{w}\t{$src, %ax|%ax, $src}", []>, OpSize;
+ def XOR32i32 : Ii32<0x35, RawFrm, (outs), (ins i32imm:$src),
+ "xor{l}\t{$src, %eax|%eax, $src}", []>;
} // isTwoAddress = 0
} // Defs = [EFLAGS]
@@ -2690,21 +2682,20 @@ let isCommutable = 1 in { // X = ADD Y, Z --> X = ADD Z, Y
def ADD8rr : I<0x00, MRMDestReg, (outs GR8 :$dst),
(ins GR8 :$src1, GR8 :$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1, GR8:$src2))]>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
// Register-Register Addition
def ADD16rr : I<0x01, MRMDestReg, (outs GR16:$dst),
(ins GR16:$src1, GR16:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
+ GR16:$src2))]>, OpSize;
def ADD32rr : I<0x01, MRMDestReg, (outs GR32:$dst),
(ins GR32:$src1, GR32:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
+ GR32:$src2))]>;
} // end isConvertibleToThreeAddress
} // end isCommutable
@@ -2723,47 +2714,47 @@ let isCodeGenOnly = 1 in {
def ADD8rm : I<0x02, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS, (X86add_flag GR8:$src1,
+ (load addr:$src2)))]>;
def ADD16rm : I<0x03, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS, (X86add_flag GR16:$src1,
+ (load addr:$src2)))]>, OpSize;
def ADD32rm : I<0x03, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS, (X86add_flag GR32:$src1,
+ (load addr:$src2)))]>;
// Register-Integer Addition
def ADD8ri : Ii8<0x80, MRM0r, (outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
"add{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (add GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86add_flag GR8:$src1, imm:$src2))]>;
let isConvertibleToThreeAddress = 1 in { // Can transform into LEA.
// Register-Integer Addition
def ADD16ri : Ii16<0x81, MRM0r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86add_flag GR16:$src1, imm:$src2))]>, OpSize;
def ADD32ri : Ii32<0x81, MRM0r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86add_flag GR32:$src1, imm:$src2))]>;
def ADD16ri8 : Ii8<0x83, MRM0r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"add{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (add GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86add_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
def ADD32ri8 : Ii8<0x83, MRM0r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"add{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (add GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86add_flag GR32:$src1, i32immSExt8:$src2))]>;
}
let isTwoAddress = 0 in {
@@ -2911,16 +2902,16 @@ let isTwoAddress = 0 in {
// Register-Register Subtraction
def SUB8rr : I<0x28, MRMDestReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, GR8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86sub_flag GR8:$src1, GR8:$src2))]>;
def SUB16rr : I<0x29, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, GR16:$src2))]>, OpSize;
def SUB32rr : I<0x29, MRMDestReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, GR32:$src2))]>;
def SUB8rr_REV : I<0x2A, MRMSrcReg, (outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}", []>;
@@ -2935,45 +2926,45 @@ def SUB32rr_REV : I<0x2B, MRMSrcReg, (outs GR32:$dst),
def SUB8rm : I<0x2A, MRMSrcMem, (outs GR8 :$dst),
(ins GR8 :$src1, i8mem :$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86sub_flag GR8:$src1, (load addr:$src2)))]>;
def SUB16rm : I<0x2B, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, (load addr:$src2)))]>, OpSize;
def SUB32rm : I<0x2B, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, (load addr:$src2)))]>;
// Register-Integer Subtraction
def SUB8ri : Ii8 <0x80, MRM5r, (outs GR8:$dst),
(ins GR8:$src1, i8imm:$src2),
"sub{b}\t{$src2, $dst|$dst, $src2}",
- [(set GR8:$dst, (sub GR8:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR8:$dst, EFLAGS,
+ (X86sub_flag GR8:$src1, imm:$src2))]>;
def SUB16ri : Ii16<0x81, MRM5r, (outs GR16:$dst),
(ins GR16:$src1, i16imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, imm:$src2))]>, OpSize;
def SUB32ri : Ii32<0x81, MRM5r, (outs GR32:$dst),
(ins GR32:$src1, i32imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, imm:$src2))]>;
def SUB16ri8 : Ii8<0x83, MRM5r, (outs GR16:$dst),
(ins GR16:$src1, i16i8imm:$src2),
"sub{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (sub GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86sub_flag GR16:$src1, i16immSExt8:$src2))]>, OpSize;
def SUB32ri8 : Ii8<0x83, MRM5r, (outs GR32:$dst),
(ins GR32:$src1, i32i8imm:$src2),
"sub{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (sub GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86sub_flag GR32:$src1, i32immSExt8:$src2))]>;
let isTwoAddress = 0 in {
// Memory-Register Subtraction
@@ -3122,25 +3113,26 @@ let isCommutable = 1 in { // X = IMUL Y, Z --> X = IMUL Z, Y
// Register-Register Signed Integer Multiply
def IMUL16rr : I<0xAF, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src1,GR16:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, GR16:$src2)),
- (implicit EFLAGS)]>, TB, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, GR16:$src2))]>, TB, OpSize;
def IMUL32rr : I<0xAF, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1,GR32:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, GR32:$src2)),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, GR32:$src2))]>, TB;
}
// Register-Memory Signed Integer Multiply
def IMUL16rm : I<0xAF, MRMSrcMem, (outs GR16:$dst),
(ins GR16:$src1, i16mem:$src2),
"imul{w}\t{$src2, $dst|$dst, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, TB, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, (load addr:$src2)))]>,
+ TB, OpSize;
def IMUL32rm : I<0xAF, MRMSrcMem, (outs GR32:$dst),
(ins GR32:$src1, i32mem:$src2),
"imul{l}\t{$src2, $dst|$dst, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, (load addr:$src2))),
- (implicit EFLAGS)]>, TB;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, (load addr:$src2)))]>, TB;
} // Defs = [EFLAGS]
} // end Two Address instructions
@@ -3150,47 +3142,49 @@ let Defs = [EFLAGS] in {
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, imm:$src2))]>, OpSize;
def IMUL32rri : Ii32<0x69, MRMSrcReg, // GR32 = GR32*I32
(outs GR32:$dst), (ins GR32:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, imm:$src2))]>;
def IMUL16rri8 : Ii8<0x6B, MRMSrcReg, // GR16 = GR16*I8
(outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul GR16:$src1, i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag GR16:$src1, i16immSExt8:$src2))]>,
+ OpSize;
def IMUL32rri8 : Ii8<0x6B, MRMSrcReg, // GR32 = GR32*I8
(outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul GR32:$src1, i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag GR32:$src1, i32immSExt8:$src2))]>;
// Memory-Integer Signed Integer Multiply
def IMUL16rmi : Ii16<0x69, MRMSrcMem, // GR16 = [mem16]*I16
(outs GR16:$dst), (ins i16mem:$src1, i16imm:$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul (load addr:$src1), imm:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>,
+ OpSize;
def IMUL32rmi : Ii32<0x69, MRMSrcMem, // GR32 = [mem32]*I32
(outs GR32:$dst), (ins i32mem:$src1, i32imm:$src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul (load addr:$src1), imm:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1), imm:$src2))]>;
def IMUL16rmi8 : Ii8<0x6B, MRMSrcMem, // GR16 = [mem16]*I8
(outs GR16:$dst), (ins i16mem:$src1, i16i8imm :$src2),
"imul{w}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR16:$dst, (mul (load addr:$src1),
- i16immSExt8:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set GR16:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i16immSExt8:$src2))]>, OpSize;
def IMUL32rmi8 : Ii8<0x6B, MRMSrcMem, // GR32 = [mem32]*I8
(outs GR32:$dst), (ins i32mem:$src1, i32i8imm: $src2),
"imul{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (mul (load addr:$src1),
- i32immSExt8:$src2)),
- (implicit EFLAGS)]>;
+ [(set GR32:$dst, EFLAGS,
+ (X86smul_flag (load addr:$src1),
+ i32immSExt8:$src2))]>;
} // Defs = [EFLAGS]
//===----------------------------------------------------------------------===//
@@ -4345,9 +4339,12 @@ def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
(TCRETURNri GR32_TC:$dst, imm:$off)>,
Requires<[In32BitMode]>;
+// FIXME: This is disabled for 32-bit PIC mode because the global base
+// register which is part of the address mode may be assigned a
+// callee-saved register.
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
- Requires<[In32BitMode]>;
+ Requires<[In32BitMode, IsNotPIC]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>,
@@ -4722,23 +4719,17 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR
-def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR16:$src1, imm:$src2),
(ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (or_is_add GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR32:$src1, imm:$src2),
(ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR16:$src1, GR16:$src2),
(ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
+def : Pat<(or_is_add GR32:$src1, GR32:$src2),
(ADD32rr GR32:$src1, GR32:$src2)>;
} // AddedComplexity
@@ -4746,270 +4737,173 @@ def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2),
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
-// Register-Register Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (ADD8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (ADD16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (ADD32rr GR32:$src1, GR32:$src2)>;
+// add reg, reg
+def : Pat<(add GR8 :$src1, GR8 :$src2), (ADD8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(add GR16:$src1, GR16:$src2), (ADD16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(add GR32:$src1, GR32:$src2), (ADD32rr GR32:$src1, GR32:$src2)>;
-// Register-Memory Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// add reg, mem
+def : Pat<(add GR8:$src1, (loadi8 addr:$src2)),
(ADD8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(add GR16:$src1, (loadi16 addr:$src2)),
(ADD16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(add GR32:$src1, (loadi32 addr:$src2)),
(ADD32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Addition with EFLAGS result
-def : Pat<(parallel (X86add_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
- (ADD8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
- (ADD16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
- (ADD32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86add_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+// add reg, imm
+def : Pat<(add GR8 :$src1, imm:$src2), (ADD8ri GR8:$src1 , imm:$src2)>;
+def : Pat<(add GR16:$src1, imm:$src2), (ADD16ri GR16:$src1, imm:$src2)>;
+def : Pat<(add GR32:$src1, imm:$src2), (ADD32ri GR32:$src1, imm:$src2)>;
+def : Pat<(add GR16:$src1, i16immSExt8:$src2),
(ADD16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(add GR32:$src1, i32immSExt8:$src2),
(ADD32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (SUB8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (SUB16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (SUB32rr GR32:$src1, GR32:$src2)>;
+// sub reg, reg
+def : Pat<(sub GR8 :$src1, GR8 :$src2), (SUB8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(sub GR16:$src1, GR16:$src2), (SUB16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(sub GR32:$src1, GR32:$src2), (SUB32rr GR32:$src1, GR32:$src2)>;
-// Register-Memory Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// sub reg, mem
+def : Pat<(sub GR8:$src1, (loadi8 addr:$src2)),
(SUB8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(sub GR16:$src1, (loadi16 addr:$src2)),
(SUB16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(sub GR32:$src1, (loadi32 addr:$src2)),
(SUB32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Subtraction with EFLAGS result
-def : Pat<(parallel (X86sub_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
+// sub reg, imm
+def : Pat<(sub GR8:$src1, imm:$src2),
(SUB8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR16:$src1, imm:$src2),
(SUB16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR32:$src1, imm:$src2),
(SUB32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86sub_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR16:$src1, i16immSExt8:$src2),
(SUB16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(sub GR32:$src1, i32immSExt8:$src2),
(SUB32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
+// mul reg, reg
+def : Pat<(mul GR16:$src1, GR16:$src2),
(IMUL16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, GR32:$src2),
(IMUL32rr GR32:$src1, GR32:$src2)>;
-// Register-Memory Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+// mul reg, mem
+def : Pat<(mul GR16:$src1, (loadi16 addr:$src2)),
(IMUL16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, (loadi32 addr:$src2)),
(IMUL32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+// mul reg, imm
+def : Pat<(mul GR16:$src1, imm:$src2),
(IMUL16rri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, imm:$src2),
(IMUL32rri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR16:$src1, i16immSExt8:$src2),
(IMUL16rri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul GR32:$src1, i32immSExt8:$src2),
(IMUL32rri8 GR32:$src1, i32immSExt8:$src2)>;
-// Memory-Integer Signed Integer Multiply with EFLAGS result
-def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), imm:$src2),
- (implicit EFLAGS)),
+// reg = mul mem, imm
+def : Pat<(mul (loadi16 addr:$src1), imm:$src2),
(IMUL16rmi addr:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi32 addr:$src1), imm:$src2),
(IMUL32rmi addr:$src1, imm:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi16 addr:$src1), i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi16 addr:$src1), i16immSExt8:$src2),
(IMUL16rmi8 addr:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86smul_flag (loadi32 addr:$src1), i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(mul (loadi32 addr:$src1), i32immSExt8:$src2),
(IMUL32rmi8 addr:$src1, i32immSExt8:$src2)>;
// Optimize multiply by 2 with EFLAGS result.
let AddedComplexity = 2 in {
-def : Pat<(parallel (X86smul_flag GR16:$src1, 2),
- (implicit EFLAGS)),
- (ADD16rr GR16:$src1, GR16:$src1)>;
-
-def : Pat<(parallel (X86smul_flag GR32:$src1, 2),
- (implicit EFLAGS)),
- (ADD32rr GR32:$src1, GR32:$src1)>;
+def : Pat<(X86smul_flag GR16:$src1, 2), (ADD16rr GR16:$src1, GR16:$src1)>;
+def : Pat<(X86smul_flag GR32:$src1, 2), (ADD32rr GR32:$src1, GR32:$src1)>;
}
-// INC and DEC with EFLAGS result. Note that these do not set CF.
-def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)),
- (INC8r GR8:$src)>;
-def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)),
- (DEC8r GR8:$src)>;
-
-def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)),
- (INC16r GR16:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)),
- (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
-
-def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)),
- (INC32r GR32:$src)>, Requires<[In32BitMode]>;
-def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)),
- (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
-
-// Register-Register Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (OR8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (OR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (OR32rr GR32:$src1, GR32:$src2)>;
-
-// Register-Memory Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// Patterns for nodes that do not produce flags, for instructions that do.
+
+// Increment reg.
+def : Pat<(add GR8:$src , 1), (INC8r GR8:$src)>;
+def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[In32BitMode]>;
+
+// Decrement reg.
+def : Pat<(add GR8:$src , -1), (DEC8r GR8:$src)>;
+def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[In32BitMode]>;
+def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[In32BitMode]>;
+
+// or reg/reg.
+def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(or GR16:$src1, GR16:$src2), (OR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(or GR32:$src1, GR32:$src2), (OR32rr GR32:$src1, GR32:$src2)>;
+
+// or reg/mem
+def : Pat<(or GR8:$src1, (loadi8 addr:$src2)),
(OR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(or GR16:$src1, (loadi16 addr:$src2)),
(OR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(or GR32:$src1, (loadi32 addr:$src2)),
(OR32rm GR32:$src1, addr:$src2)>;
-// Register-Integer Or with EFLAGS result
-def : Pat<(parallel (X86or_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
- (OR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
- (OR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
- (OR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86or_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+// or reg/imm
+def : Pat<(or GR8:$src1 , imm:$src2), (OR8ri GR8 :$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, imm:$src2), (OR16ri GR16:$src1, imm:$src2)>;
+def : Pat<(or GR32:$src1, imm:$src2), (OR32ri GR32:$src1, imm:$src2)>;
+def : Pat<(or GR16:$src1, i16immSExt8:$src2),
(OR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(or GR32:$src1, i32immSExt8:$src2),
(OR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (XOR8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (XOR16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (XOR32rr GR32:$src1, GR32:$src2)>;
-
-// Register-Memory XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// xor reg/reg
+def : Pat<(xor GR8 :$src1, GR8 :$src2), (XOR8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(xor GR16:$src1, GR16:$src2), (XOR16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(xor GR32:$src1, GR32:$src2), (XOR32rr GR32:$src1, GR32:$src2)>;
+
+// xor reg/mem
+def : Pat<(xor GR8:$src1, (loadi8 addr:$src2)),
(XOR8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(xor GR16:$src1, (loadi16 addr:$src2)),
(XOR16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(xor GR32:$src1, (loadi32 addr:$src2)),
(XOR32rm GR32:$src1, addr:$src2)>;
-// Register-Integer XOr with EFLAGS result
-def : Pat<(parallel (X86xor_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
+// xor reg/imm
+def : Pat<(xor GR8:$src1, imm:$src2),
(XOR8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR16:$src1, imm:$src2),
(XOR16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR32:$src1, imm:$src2),
(XOR32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86xor_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR16:$src1, i16immSExt8:$src2),
(XOR16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(xor GR32:$src1, i32immSExt8:$src2),
(XOR32ri8 GR32:$src1, i32immSExt8:$src2)>;
-// Register-Register And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2),
- (implicit EFLAGS)),
- (AND8rr GR8:$src1, GR8:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, GR16:$src2),
- (implicit EFLAGS)),
- (AND16rr GR16:$src1, GR16:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, GR32:$src2),
- (implicit EFLAGS)),
- (AND32rr GR32:$src1, GR32:$src2)>;
-
-// Register-Memory And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR8:$src1, (loadi8 addr:$src2)),
- (implicit EFLAGS)),
+// and reg/reg
+def : Pat<(and GR8 :$src1, GR8 :$src2), (AND8rr GR8 :$src1, GR8 :$src2)>;
+def : Pat<(and GR16:$src1, GR16:$src2), (AND16rr GR16:$src1, GR16:$src2)>;
+def : Pat<(and GR32:$src1, GR32:$src2), (AND32rr GR32:$src1, GR32:$src2)>;
+
+// and reg/mem
+def : Pat<(and GR8:$src1, (loadi8 addr:$src2)),
(AND8rm GR8:$src1, addr:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, (loadi16 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(and GR16:$src1, (loadi16 addr:$src2)),
(AND16rm GR16:$src1, addr:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, (loadi32 addr:$src2)),
- (implicit EFLAGS)),
+def : Pat<(and GR32:$src1, (loadi32 addr:$src2)),
(AND32rm GR32:$src1, addr:$src2)>;
-// Register-Integer And with EFLAGS result
-def : Pat<(parallel (X86and_flag GR8:$src1, imm:$src2),
- (implicit EFLAGS)),
+// and reg/imm
+def : Pat<(and GR8:$src1, imm:$src2),
(AND8ri GR8:$src1, imm:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR16:$src1, imm:$src2),
(AND16ri GR16:$src1, imm:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, imm:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR32:$src1, imm:$src2),
(AND32ri GR32:$src1, imm:$src2)>;
-def : Pat<(parallel (X86and_flag GR16:$src1, i16immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR16:$src1, i16immSExt8:$src2),
(AND16ri8 GR16:$src1, i16immSExt8:$src2)>;
-def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2),
- (implicit EFLAGS)),
+def : Pat<(and GR32:$src1, i32immSExt8:$src2),
(AND32ri8 GR32:$src1, i32immSExt8:$src2)>;
// -disable-16bit support.
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index e1203e2..1c81c5e 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -605,22 +605,9 @@ let AddedComplexity = 10 in {
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
VR64:$src2)),
(MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
- VR64:$src2)),
- (MMX_PANDNrr VR64:$src1, VR64:$src2)>;
-
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
(load addr:$src2))),
(MMX_PANDNrm VR64:$src1, addr:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v4i16 immAllOnesV_bc))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
-def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v8i8 immAllOnesV_bc))),
- (load addr:$src2))),
- (MMX_PANDNrm VR64:$src1, addr:$src2)>;
// Move MMX to lower 64-bit of XMM
def : Pat<(v2i64 (scalar_to_vector (i64 (bitconvert (v8i8 VR64:$src))))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 720b663..dadc2a6 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -69,8 +69,9 @@ def X86pcmpgtw : SDNode<"X86ISD::PCMPGTW", SDTIntBinOp>;
def X86pcmpgtd : SDNode<"X86ISD::PCMPGTD", SDTIntBinOp>;
def X86pcmpgtq : SDNode<"X86ISD::PCMPGTQ", SDTIntBinOp>;
-def SDTX86CmpPTest : SDTypeProfile<0, 2, [SDTCisVT<0, v4f32>,
- SDTCisVT<1, v4f32>]>;
+def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
+ SDTCisVT<1, v4f32>,
+ SDTCisVT<2, v4f32>]>;
def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
//===----------------------------------------------------------------------===//
@@ -1114,15 +1115,19 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
// load of an all-zeros value if folding it would be beneficial.
// FIXME: Change encoding to pseudo!
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
-def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ isCodeGenOnly = 1 in {
+def V_SET0PS : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v4f32 immAllZerosV))]>;
+def V_SET0PD : PDI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
+ [(set VR128:$dst, (v2f64 immAllZerosV))]>;
+let ExeDomain = SSEPackedInt in
+def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
+}
-def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
-def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
-def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
-def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
-def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
+def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0PI)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
@@ -1935,6 +1940,7 @@ let Constraints = "$src1 = $dst" in {
//===---------------------------------------------------------------------===//
// SSE integer instructions
+let ExeDomain = SSEPackedInt in {
// Move Instructions
let neverHasSideEffects = 1 in
@@ -2043,6 +2049,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
}
} // Constraints = "$src1 = $dst"
+} // ExeDomain = SSEPackedInt
// 128-bit Integer Arithmetic
@@ -2105,7 +2112,8 @@ defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad",
int_x86_sse2_psra_d, int_x86_sse2_psrai_d>;
// 128-bit logical shifts.
-let Constraints = "$src1 = $dst", neverHasSideEffects = 1 in {
+let Constraints = "$src1 = $dst", neverHasSideEffects = 1,
+ ExeDomain = SSEPackedInt in {
def PSLLDQri : PDIi8<0x73, MRM7r,
(outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
"pslldq\t{$src2, $dst|$dst, $src2}", []>;
@@ -2139,7 +2147,7 @@ defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>;
defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or , 1>;
defm PXOR : PDI_binop_rm_v2i64<0xEF, "pxor", xor, 1>;
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", ExeDomain = SSEPackedInt in {
def PANDNrr : PDI<0xDF, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"pandn\t{$src2, $dst|$dst, $src2}",
@@ -2193,6 +2201,8 @@ defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>;
defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>;
defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>;
+let ExeDomain = SSEPackedInt in {
+
// Shuffle and unpack instructions
let AddedComplexity = 5 in {
def PSHUFDri : PDIi8<0x70, MRMSrcReg,
@@ -2369,10 +2379,13 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
+} // ExeDomain = SSEPackedInt
+
// Non-temporal stores
def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+let ExeDomain = SSEPackedInt in
def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
@@ -2386,6 +2399,7 @@ def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntpd\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+let ExeDomain = SSEPackedInt in
def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
[(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
@@ -2414,7 +2428,7 @@ def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
// We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial.
let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
- isCodeGenOnly = 1 in
+ isCodeGenOnly = 1, ExeDomain = SSEPackedInt in
// FIXME: Change encoding to pseudo.
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
@@ -3016,14 +3030,14 @@ let Predicates = [HasSSE2] in {
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
+ (MOVSDrr (v2f64 (V_SET0PS)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
+ (MOVSSrr (v4f32 (V_SET0PS)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVSSrr (v4f32 (V_SET0)),
+ (MOVSSrr (v4f32 (V_SET0PS)),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVSSrr (v4i32 (V_SET0)),
+ (MOVSSrr (v4i32 (V_SET0PI)),
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
@@ -3181,9 +3195,6 @@ def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
(SHUFFLE_get_shuf_imm VR128:$src3))>;
// Set lowest element and zero upper elements.
-let AddedComplexity = 15 in
-def : Pat<(v2f64 (movl immAllZerosV_bc, VR128:$src)),
- (MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
(MOVZPQILo2PQIrr VR128:$src)>, Requires<[HasSSE2]>;
@@ -3441,8 +3452,28 @@ let Constraints = "$src1 = $dst" in {
OpSize;
}
}
-defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
- int_x86_sse41_pmulld, 1>;
+
+/// SS48I_binop_rm - Simple SSE41 binary operator.
+let Constraints = "$src1 = $dst" in {
+multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+ ValueType OpVT, bit Commutable = 0> {
+ def rr : SS48I<opc, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, VR128:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpVT (OpNode VR128:$src1, VR128:$src2)))]>,
+ OpSize {
+ let isCommutable = Commutable;
+ }
+ def rm : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
+ (ins VR128:$src1, i128mem:$src2),
+ !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+ [(set VR128:$dst, (OpNode VR128:$src1,
+ (bc_v4i32 (memopv2i64 addr:$src2))))]>,
+ OpSize;
+}
+}
+
+defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, 1>;
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
let Constraints = "$src1 = $dst" in {
@@ -3772,12 +3803,12 @@ def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
let Defs = [EFLAGS] in {
def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
- [(X86ptest VR128:$src1, VR128:$src2),
- (implicit EFLAGS)]>, OpSize;
+ [(set EFLAGS, (X86ptest VR128:$src1, VR128:$src2))]>,
+ OpSize;
def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2),
"ptest \t{$src2, $src1|$src1, $src2}",
- [(X86ptest VR128:$src1, (load addr:$src2)),
- (implicit EFLAGS)]>, OpSize;
+ [(set EFLAGS, (X86ptest VR128:$src1, (load addr:$src2)))]>,
+ OpSize;
}
def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
@@ -3817,6 +3848,53 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>;
+// TODO: These should be AES as a feature set.
+defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc",
+ int_x86_aesni_aesimc>;
+defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc",
+ int_x86_aesni_aesenc>;
+defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast",
+ int_x86_aesni_aesenclast>;
+defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec",
+ int_x86_aesni_aesdec>;
+defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast",
+ int_x86_aesni_aesdeclast>;
+
+def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
+ (AESIMCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
+ (AESIMCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
+ (AESENCrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
+ (AESENCrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
+ (AESENCLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
+ (AESENCLASTrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
+ (AESDECrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
+ (AESDECrm VR128:$src1, addr:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
+ (AESDECLASTrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
+ (AESDECLASTrm VR128:$src1, addr:$src2)>;
+
+def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst),
+ (ins VR128:$src1, i32i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
+ OpSize;
+def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst),
+ (ins i128mem:$src1, i32i8imm:$src2),
+ "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ [(set VR128:$dst,
+ (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
+ imm:$src2))]>,
+ OpSize;
+
// crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size
// of r and m.
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index cd56816..8a0cde4 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -266,6 +266,9 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+ // If it's Nehalem, unaligned memory access is fast.
+ if (Family == 15 && Model == 26)
+ IsUAMemFast = true;
GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
@@ -286,6 +289,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
+ , IsUAMemFast(false)
, HasVectorUAMem(false)
, DarwinVers(0)
, stackAlignment(8)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 56220db..bf30154 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -78,6 +78,9 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
+ /// IsUAMemFast - True if unaligned memory access is fast.
+ bool IsUAMemFast;
+
/// HasVectorUAMem - True if SIMD operations can have unaligned memory
/// operands. This may require setting a feature bit in the
/// processor.
@@ -148,6 +151,7 @@ public:
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index f13e6f3..c608e56 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -17,6 +17,7 @@
#include "llvm/PassManager.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegistry.h"
@@ -169,6 +170,15 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM,
return true; // -print-machineinstr should print after this.
}
+bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (OptLevel != CodeGenOpt::None && Subtarget.hasSSE2()) {
+ PM.add(createSSEDomainFixPass());
+ return true;
+ }
+ return false;
+}
+
bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE) {
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 2bb5454..ae7b5b2 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -66,6 +66,7 @@ public:
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPostRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addCodeEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel,
JITCodeEmitter &JCE);
};
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index e5f5a6d..54df33c 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -215,7 +215,15 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
bool AllowModify) const {
// If the block has no terminators, it just falls into the block after it.
MachineBasicBlock::iterator I = MBB.end();
- if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
+ if (I == MBB.begin())
+ return false;
+ --I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return false;
+ --I;
+ }
+ if (!isUnpredicatedTerminator(I))
return false;
// Get the last instruction in the block.
@@ -326,6 +334,11 @@ XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator I = MBB.end();
if (I == MBB.begin()) return 0;
--I;
+ while (I->isDebugValue()) {
+ if (I == MBB.begin())
+ return 0;
+ --I;
+ }
if (!IsBRU(I->getOpcode()) && !IsCondBranch(I->getOpcode()))
return 0;
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 2e9a1e5..dd3cbc1 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -32,7 +32,7 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
[SDNPHasChain, SDNPOptInFlag, SDNPOutFlag,
SDNPVariadic]>;
-def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone,
+def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTBrind,
[SDNPHasChain, SDNPOptInFlag]>;
def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 7cb1367..40a87e8 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -623,6 +623,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
SmallVector<Value*, 16> Args;
while (!F->use_empty()) {
CallSite CS = CallSite::get(F->use_back());
+ assert(CS.getCalledFunction() == F);
Instruction *Call = CS.getInstruction();
const AttrListPtr &CallPAL = CS.getAttributes();
@@ -660,7 +661,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
// Non-dead argument: insert GEPs and loads as appropriate.
ScalarizeTable &ArgIndices = ScalarizedElements[I];
// Store the Value* version of the indices in here, but declare it now
- // for reuse
+ // for reuse.
std::vector<Value*> Ops;
for (ScalarizeTable::iterator SI = ArgIndices.begin(),
E = ArgIndices.end(); SI != E; ++SI) {
@@ -677,16 +678,20 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
Type::getInt32Ty(F->getContext()) :
Type::getInt64Ty(F->getContext()));
Ops.push_back(ConstantInt::get(IdxTy, *II));
- // Keep track of the type we're currently indexing
+ // Keep track of the type we're currently indexing.
ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(*II);
}
- // And create a GEP to extract those indices
+ // And create a GEP to extract those indices.
V = GetElementPtrInst::Create(V, Ops.begin(), Ops.end(),
V->getName()+".idx", Call);
Ops.clear();
AA.copyValue(OrigLoad->getOperand(0), V);
}
- Args.push_back(new LoadInst(V, V->getName()+".val", Call));
+ // Since we're replacing a load make sure we take the alignment
+ // of the previous load.
+ LoadInst *newLoad = new LoadInst(V, V->getName()+".val", Call);
+ newLoad->setAlignment(OrigLoad->getAlignment());
+ Args.push_back(newLoad);
AA.copyValue(OrigLoad, Args.back());
}
}
@@ -694,7 +699,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
if (ExtraArgHack)
Args.push_back(Constant::getNullValue(Type::getInt32Ty(F->getContext())));
- // Push any varargs arguments on the list
+ // Push any varargs arguments on the list.
for (; AI != CS.arg_end(); ++AI, ++ArgIndex) {
Args.push_back(*AI);
if (Attributes Attrs = CallPAL.getParamAttributes(ArgIndex))
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index f386ed7..227602d 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -50,7 +50,7 @@ namespace {
/// argument. Used so that arguments and return values can be used
/// interchangably.
struct RetOrArg {
- RetOrArg(const Function* F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
+ RetOrArg(const Function *F, unsigned Idx, bool IsArg) : F(F), Idx(Idx),
IsArg(IsArg) {}
const Function *F;
unsigned Idx;
@@ -72,7 +72,7 @@ namespace {
}
std::string getDescription() const {
- return std::string((IsArg ? "Argument #" : "Return value #"))
+ return std::string((IsArg ? "Argument #" : "Return value #"))
+ utostr(Idx) + " of function " + F->getNameStr();
}
};
@@ -129,11 +129,11 @@ namespace {
private:
Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses);
- Liveness SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
+ Liveness SurveyUse(Value::const_use_iterator U, UseVector &MaybeLiveUses,
unsigned RetValNum = 0);
- Liveness SurveyUses(Value *V, UseVector &MaybeLiveUses);
+ Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses);
- void SurveyFunction(Function &F);
+ void SurveyFunction(const Function &F);
void MarkValue(const RetOrArg &RA, Liveness L,
const UseVector &MaybeLiveUses);
void MarkLive(const RetOrArg &RA);
@@ -196,7 +196,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
// Start by computing a new prototype for the function, which is the same as
// the old function, but doesn't have isVarArg set.
const FunctionType *FTy = Fn.getFunctionType();
-
+
std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
Params, false);
@@ -225,7 +225,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
SmallVector<AttributeWithIndex, 8> AttributesVec;
for (unsigned i = 0; PAL.getSlot(i).Index <= NumArgs; ++i)
AttributesVec.push_back(PAL.getSlot(i));
- if (Attributes FnAttrs = PAL.getFnAttributes())
+ if (Attributes FnAttrs = PAL.getFnAttributes())
AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
PAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
}
@@ -280,7 +280,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
/// for void functions and 1 for functions not returning a struct. It returns
/// the number of struct elements for functions returning a struct.
static unsigned NumRetVals(const Function *F) {
- if (F->getReturnType() == Type::getVoidTy(F->getContext()))
+ if (F->getReturnType()->isVoidTy())
return 0;
else if (const StructType *STy = dyn_cast<StructType>(F->getReturnType()))
return STy->getNumElements();
@@ -305,15 +305,15 @@ DAE::Liveness DAE::MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses) {
/// SurveyUse - This looks at a single use of an argument or return value
/// and determines if it should be alive or not. Adds this use to MaybeLiveUses
-/// if it causes the used value to become MaybeAlive.
+/// if it causes the used value to become MaybeLive.
///
/// RetValNum is the return value number to use when this use is used in a
/// return instruction. This is used in the recursion, you should always leave
/// it at 0.
-DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
- unsigned RetValNum) {
- Value *V = *U;
- if (ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
+DAE::Liveness DAE::SurveyUse(Value::const_use_iterator U,
+ UseVector &MaybeLiveUses, unsigned RetValNum) {
+ const User *V = *U;
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(V)) {
// The value is returned from a function. It's only live when the
// function's return value is live. We use RetValNum here, for the case
// that U is really a use of an insertvalue instruction that uses the
@@ -322,7 +322,7 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
// We might be live, depending on the liveness of Use.
return MarkIfNotLive(Use, MaybeLiveUses);
}
- if (InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
+ if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(V)) {
if (U.getOperandNo() != InsertValueInst::getAggregateOperandIndex()
&& IV->hasIndices())
// The use we are examining is inserted into an aggregate. Our liveness
@@ -334,7 +334,7 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
// we don't change RetValNum, but do survey all our uses.
Liveness Result = MaybeLive;
- for (Value::use_iterator I = IV->use_begin(),
+ for (Value::const_use_iterator I = IV->use_begin(),
E = V->use_end(); I != E; ++I) {
Result = SurveyUse(I, MaybeLiveUses, RetValNum);
if (Result == Live)
@@ -342,24 +342,24 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
}
return Result;
}
- CallSite CS = CallSite::get(V);
- if (CS.getInstruction()) {
- Function *F = CS.getCalledFunction();
+
+ if (ImmutableCallSite CS = V) {
+ const Function *F = CS.getCalledFunction();
if (F) {
// Used in a direct call.
-
+
// Find the argument number. We know for sure that this use is an
// argument, since if it was the function argument this would be an
// indirect call and the we know can't be looking at a value of the
// label type (for the invoke instruction).
- unsigned ArgNo = CS.getArgumentNo(U.getOperandNo());
+ unsigned ArgNo = CS.getArgumentNo(U);
if (ArgNo >= F->getFunctionType()->getNumParams())
// The value is passed in through a vararg! Must be live.
return Live;
- assert(CS.getArgument(ArgNo)
- == CS.getInstruction()->getOperand(U.getOperandNo())
+ assert(CS.getArgument(ArgNo)
+ == CS->getOperand(U.getOperandNo())
&& "Argument is not where we expected it");
// Value passed to a normal call. It's only live when the corresponding
@@ -378,11 +378,11 @@ DAE::Liveness DAE::SurveyUse(Value::use_iterator U, UseVector &MaybeLiveUses,
/// Adds all uses that cause the result to be MaybeLive to MaybeLiveRetUses. If
/// the result is Live, MaybeLiveUses might be modified but its content should
/// be ignored (since it might not be complete).
-DAE::Liveness DAE::SurveyUses(Value *V, UseVector &MaybeLiveUses) {
+DAE::Liveness DAE::SurveyUses(const Value *V, UseVector &MaybeLiveUses) {
// Assume it's dead (which will only hold if there are no uses at all..).
Liveness Result = MaybeLive;
// Check each use.
- for (Value::use_iterator I = V->use_begin(),
+ for (Value::const_use_iterator I = V->use_begin(),
E = V->use_end(); I != E; ++I) {
Result = SurveyUse(I, MaybeLiveUses);
if (Result == Live)
@@ -399,7 +399,7 @@ DAE::Liveness DAE::SurveyUses(Value *V, UseVector &MaybeLiveUses) {
// We consider arguments of non-internal functions to be intrinsically alive as
// well as arguments to functions which have their "address taken".
//
-void DAE::SurveyFunction(Function &F) {
+void DAE::SurveyFunction(const Function &F) {
unsigned RetCount = NumRetVals(&F);
// Assume all return values are dead
typedef SmallVector<Liveness, 5> RetVals;
@@ -411,8 +411,8 @@ void DAE::SurveyFunction(Function &F) {
// MaybeLive. Initialized to a list of RetCount empty lists.
RetUses MaybeLiveRetUses(RetCount);
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
- if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
+ for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+ if (const ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
if (RI->getNumOperands() != 0 && RI->getOperand(0)->getType()
!= F.getFunctionType()->getReturnType()) {
// We don't support old style multiple return values.
@@ -431,17 +431,18 @@ void DAE::SurveyFunction(Function &F) {
unsigned NumLiveRetVals = 0;
const Type *STy = dyn_cast<StructType>(F.getReturnType());
// Loop all uses of the function.
- for (Value::use_iterator I = F.use_begin(), E = F.use_end(); I != E; ++I) {
+ for (Value::const_use_iterator I = F.use_begin(), E = F.use_end();
+ I != E; ++I) {
// If the function is PASSED IN as an argument, its address has been
// taken.
- CallSite CS = CallSite::get(*I);
- if (!CS.getInstruction() || !CS.isCallee(I)) {
+ ImmutableCallSite CS(*I);
+ if (!CS || !CS.isCallee(I)) {
MarkLive(F);
return;
}
// If this use is anything other than a call site, the function is alive.
- Instruction *TheCall = CS.getInstruction();
+ const Instruction *TheCall = CS.getInstruction();
if (!TheCall) { // Not a direct call site?
MarkLive(F);
return;
@@ -454,9 +455,9 @@ void DAE::SurveyFunction(Function &F) {
if (NumLiveRetVals != RetCount) {
if (STy) {
// Check all uses of the return value.
- for (Value::use_iterator I = TheCall->use_begin(),
+ for (Value::const_use_iterator I = TheCall->use_begin(),
E = TheCall->use_end(); I != E; ++I) {
- ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
+ const ExtractValueInst *Ext = dyn_cast<ExtractValueInst>(*I);
if (Ext && Ext->hasIndices()) {
// This use uses a part of our return value, survey the uses of
// that part and store the results for this index only.
@@ -493,7 +494,7 @@ void DAE::SurveyFunction(Function &F) {
// Now, check all of our arguments.
unsigned i = 0;
UseVector MaybeLiveArgUses;
- for (Function::arg_iterator AI = F.arg_begin(),
+ for (Function::const_arg_iterator AI = F.arg_begin(),
E = F.arg_end(); AI != E; ++AI, ++i) {
// See what the effect of this use is (recording any uses that cause
// MaybeLive in MaybeLiveArgUses).
@@ -599,12 +600,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
const Type *RetTy = FTy->getReturnType();
const Type *NRetTy = NULL;
unsigned RetCount = NumRetVals(F);
-
+
// -1 means unused, other numbers are the new index
SmallVector<int, 5> NewRetIdxs(RetCount, -1);
std::vector<const Type*> RetTypes;
- if (RetTy == Type::getVoidTy(F->getContext())) {
- NRetTy = Type::getVoidTy(F->getContext());
+ if (RetTy->isVoidTy()) {
+ NRetTy = RetTy;
} else {
const StructType *STy = dyn_cast<StructType>(RetTy);
if (STy)
@@ -653,10 +654,10 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// values. Otherwise, ensure that we don't have any conflicting attributes
// here. Currently, this should not be possible, but special handling might be
// required when new return value attributes are added.
- if (NRetTy == Type::getVoidTy(F->getContext()))
+ if (NRetTy->isVoidTy())
RAttrs &= ~Attribute::typeIncompatible(NRetTy);
else
- assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
+ assert((RAttrs & Attribute::typeIncompatible(NRetTy)) == 0
&& "Return attributes no longer compatible?");
if (RAttrs)
@@ -686,11 +687,12 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
}
- if (FnAttrs != Attribute::None)
+ if (FnAttrs != Attribute::None)
AttributesVec.push_back(AttributeWithIndex::get(~0, FnAttrs));
// Reconstruct the AttributesList based on the vector we constructed.
- AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(), AttributesVec.end());
+ AttrListPtr NewPAL = AttrListPtr::get(AttributesVec.begin(),
+ AttributesVec.end());
// Work around LLVM bug PR56: the CWriter cannot emit varargs functions which
// have zero fixed arguments.
@@ -705,8 +707,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
}
// Create the new function type based on the recomputed parameters.
- FunctionType *NFTy = FunctionType::get(NRetTy, Params,
- FTy->isVarArg());
+ FunctionType *NFTy = FunctionType::get(NRetTy, Params, FTy->isVarArg());
// No change?
if (NFTy == FTy)
@@ -791,7 +792,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Return type not changed? Just replace users then.
Call->replaceAllUsesWith(New);
New->takeName(Call);
- } else if (New->getType() == Type::getVoidTy(F->getContext())) {
+ } else if (New->getType()->isVoidTy()) {
// Our return value has uses, but they will get removed later on.
// Replace by null for now.
Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
@@ -805,7 +806,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
while (isa<PHINode>(IP)) ++IP;
InsertPt = IP;
}
-
+
// We used to return a struct. Instead of doing smart stuff with all the
// uses of this struct, we will just rebuild it using
// extract/insertvalue chaining and let instcombine clean that up.
@@ -929,11 +930,11 @@ bool DAE::runOnModule(Module &M) {
DEBUG(dbgs() << "DAE - Determining liveness\n");
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
SurveyFunction(*I);
-
+
// Now, remove all dead arguments and return values from each function in
- // turn
+ // turn.
for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
- // Increment now, because the function will probably get removed (ie
+ // Increment now, because the function will probably get removed (ie.
// replaced by a new one).
Function *F = I++;
Changed |= RemoveDeadStuffFromFunction(F);
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index d8e97a2..ddff5ef 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -119,7 +119,7 @@ struct GlobalStatus {
/// null/false. When the first accessing function is noticed, it is recorded.
/// When a second different accessing function is noticed,
/// HasMultipleAccessingFunctions is set to true.
- Function *AccessingFunction;
+ const Function *AccessingFunction;
bool HasMultipleAccessingFunctions;
/// HasNonInstructionUser - Set to true if this global has a user that is not
@@ -140,11 +140,11 @@ struct GlobalStatus {
// by constants itself. Note that constants cannot be cyclic, so this test is
// pretty easy to implement recursively.
//
-static bool SafeToDestroyConstant(Constant *C) {
+static bool SafeToDestroyConstant(const Constant *C) {
if (isa<GlobalValue>(C)) return false;
- for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
- if (Constant *CU = dyn_cast<Constant>(*UI)) {
+ for (Value::const_use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI)
+ if (const Constant *CU = dyn_cast<Constant>(*UI)) {
if (!SafeToDestroyConstant(CU)) return false;
} else
return false;
@@ -156,26 +156,26 @@ static bool SafeToDestroyConstant(Constant *C) {
/// structure. If the global has its address taken, return true to indicate we
/// can't do anything with it.
///
-static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
- SmallPtrSet<PHINode*, 16> &PHIUsers) {
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+static bool AnalyzeGlobal(const Value *V, GlobalStatus &GS,
+ SmallPtrSet<const PHINode*, 16> &PHIUsers) {
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
+ if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
GS.HasNonInstructionUser = true;
if (AnalyzeGlobal(CE, GS, PHIUsers)) return true;
- } else if (Instruction *I = dyn_cast<Instruction>(*UI)) {
+ } else if (const Instruction *I = dyn_cast<Instruction>(*UI)) {
if (!GS.HasMultipleAccessingFunctions) {
- Function *F = I->getParent()->getParent();
+ const Function *F = I->getParent()->getParent();
if (GS.AccessingFunction == 0)
GS.AccessingFunction = F;
else if (GS.AccessingFunction != F)
GS.HasMultipleAccessingFunctions = true;
}
- if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {
GS.isLoaded = true;
if (LI->isVolatile()) return true; // Don't hack on volatile loads.
- } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ } else if (const StoreInst *SI = dyn_cast<StoreInst>(I)) {
// Don't allow a store OF the address, only stores TO the address.
if (SI->getOperand(0) == V) return true;
@@ -185,14 +185,13 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
// value, not an aggregate), keep more specific information about
// stores.
if (GS.StoredType != GlobalStatus::isStored) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){
+ if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(SI->getOperand(1))){
Value *StoredVal = SI->getOperand(0);
if (StoredVal == GV->getInitializer()) {
if (GS.StoredType < GlobalStatus::isInitializerStored)
GS.StoredType = GlobalStatus::isInitializerStored;
} else if (isa<LoadInst>(StoredVal) &&
cast<LoadInst>(StoredVal)->getOperand(0) == GV) {
- // G = G
if (GS.StoredType < GlobalStatus::isInitializerStored)
GS.StoredType = GlobalStatus::isInitializerStored;
} else if (GS.StoredType < GlobalStatus::isStoredOnce) {
@@ -212,7 +211,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
} else if (isa<SelectInst>(I)) {
if (AnalyzeGlobal(I, GS, PHIUsers)) return true;
- } else if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ } else if (const PHINode *PN = dyn_cast<PHINode>(I)) {
// PHI nodes we can check just like select or GEP instructions, but we
// have to be careful about infinite recursion.
if (PHIUsers.insert(PN)) // Not already visited.
@@ -230,7 +229,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
} else {
return true; // Any other non-load instruction might take address!
}
- } else if (Constant *C = dyn_cast<Constant>(*UI)) {
+ } else if (const Constant *C = dyn_cast<Constant>(*UI)) {
GS.HasNonInstructionUser = true;
// We might have a dead and dangling constant hanging off of here.
if (!SafeToDestroyConstant(C))
@@ -1029,23 +1028,23 @@ static void ReplaceUsesOfMallocWithGlobal(Instruction *Alloc,
/// LoadUsesSimpleEnoughForHeapSRA - Verify that all uses of V (a load, or a phi
/// of a load) are simple enough to perform heap SRA on. This permits GEP's
/// that index through the array and struct field, icmps of null, and PHIs.
-static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
- SmallPtrSet<PHINode*, 32> &LoadUsingPHIs,
- SmallPtrSet<PHINode*, 32> &LoadUsingPHIsPerLoad) {
+static bool LoadUsesSimpleEnoughForHeapSRA(const Value *V,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIs,
+ SmallPtrSet<const PHINode*, 32> &LoadUsingPHIsPerLoad) {
// We permit two users of the load: setcc comparing against the null
// pointer, and a getelementptr of a specific form.
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
- Instruction *User = cast<Instruction>(*UI);
+ for (Value::const_use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
+ const Instruction *User = cast<Instruction>(*UI);
// Comparison against null is ok.
- if (ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
+ if (const ICmpInst *ICI = dyn_cast<ICmpInst>(User)) {
if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
return false;
continue;
}
// getelementptr is also ok, but only a simple form.
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
+ if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
// Must index into the array and into the struct.
if (GEPI->getNumOperands() < 3)
return false;
@@ -1054,7 +1053,7 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
continue;
}
- if (PHINode *PN = dyn_cast<PHINode>(User)) {
+ if (const PHINode *PN = dyn_cast<PHINode>(User)) {
if (!LoadUsingPHIsPerLoad.insert(PN))
// This means some phi nodes are dependent on each other.
// Avoid infinite looping!
@@ -1081,13 +1080,13 @@ static bool LoadUsesSimpleEnoughForHeapSRA(Value *V,
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA - If all users of values loaded from
/// GV are simple enough to perform HeapSRA, return true.
-static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
+static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(const GlobalVariable *GV,
Instruction *StoredVal) {
- SmallPtrSet<PHINode*, 32> LoadUsingPHIs;
- SmallPtrSet<PHINode*, 32> LoadUsingPHIsPerLoad;
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIs;
+ SmallPtrSet<const PHINode*, 32> LoadUsingPHIsPerLoad;
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end(); UI != E;
++UI)
- if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
if (!LoadUsesSimpleEnoughForHeapSRA(LI, LoadUsingPHIs,
LoadUsingPHIsPerLoad))
return false;
@@ -1099,16 +1098,16 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
// that all inputs the to the PHI nodes are in the same equivalence sets.
// Check to verify that all operands of the PHIs are either PHIS that can be
// transformed, loads from GV, or MI itself.
- for (SmallPtrSet<PHINode*, 32>::iterator I = LoadUsingPHIs.begin(),
+ for (SmallPtrSet<const PHINode*, 32>::const_iterator I = LoadUsingPHIs.begin(),
E = LoadUsingPHIs.end(); I != E; ++I) {
- PHINode *PN = *I;
+ const PHINode *PN = *I;
for (unsigned op = 0, e = PN->getNumIncomingValues(); op != e; ++op) {
Value *InVal = PN->getIncomingValue(op);
// PHI of the stored value itself is ok.
if (InVal == StoredVal) continue;
- if (PHINode *InPN = dyn_cast<PHINode>(InVal)) {
+ if (const PHINode *InPN = dyn_cast<PHINode>(InVal)) {
// One of the PHIs in our set is (optimistically) ok.
if (LoadUsingPHIs.count(InPN))
continue;
@@ -1116,7 +1115,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
}
// Load from GV is ok.
- if (LoadInst *LI = dyn_cast<LoadInst>(InVal))
+ if (const LoadInst *LI = dyn_cast<LoadInst>(InVal))
if (LI->getOperand(0) == GV)
continue;
@@ -1664,7 +1663,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
/// it if possible. If we make a change, return true.
bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
Module::global_iterator &GVI) {
- SmallPtrSet<PHINode*, 16> PHIUsers;
+ SmallPtrSet<const PHINode*, 16> PHIUsers;
GlobalStatus GS;
GV->removeDeadConstantUsers();
@@ -1715,12 +1714,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GS.AccessingFunction->hasExternalLinkage() &&
GV->getType()->getAddressSpace() == 0) {
DEBUG(dbgs() << "LOCALIZING GLOBAL: " << *GV);
- Instruction* FirstI = GS.AccessingFunction->getEntryBlock().begin();
+ Instruction& FirstI = const_cast<Instruction&>(*GS.AccessingFunction
+ ->getEntryBlock().begin());
const Type* ElemTy = GV->getType()->getElementType();
// FIXME: Pass Global's alignment when globals have alignment
- AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), FirstI);
+ AllocaInst* Alloca = new AllocaInst(ElemTy, NULL, GV->getName(), &FirstI);
if (!isa<UndefValue>(GV->getInitializer()))
- new StoreInst(GV->getInitializer(), Alloca, FirstI);
+ new StoreInst(GV->getInitializer(), Alloca, &FirstI);
GV->replaceAllUsesWith(Alloca);
GV->eraseFromParent();
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 3ae771c..161246b 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -168,7 +168,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) {
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
if (II->doesNotThrow()) {
- SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
// Insert a call instruction before the invoke.
CallInst *Call = CallInst::Create(II->getCalledValue(),
Args.begin(), Args.end(), "", II);
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 65f2e15..76c815d 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -766,7 +766,7 @@ protected:
return SizeCI->getZExtValue() >=
GetStringLength(CI->getOperand(SizeArgOp));
if (ConstantInt *Arg = dyn_cast<ConstantInt>(CI->getOperand(SizeArgOp)))
- return SizeCI->getZExtValue() <= Arg->getZExtValue();
+ return SizeCI->getZExtValue() >= Arg->getZExtValue();
}
return false;
}
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index ea8e5c3..6135992 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -27,6 +27,7 @@
#define DEBUG_TYPE "abcd"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Constants.h"
@@ -77,10 +78,10 @@ class ABCD : public FunctionPass {
class Bound {
public:
Bound(APInt v, bool upper) : value(v), upper_bound(upper) {}
- Bound(const Bound *b, int cnst)
- : value(b->value - cnst), upper_bound(b->upper_bound) {}
- Bound(const Bound *b, const APInt &cnst)
- : value(b->value - cnst), upper_bound(b->upper_bound) {}
+ Bound(const Bound &b, int cnst)
+ : value(b.value - cnst), upper_bound(b.upper_bound) {}
+ Bound(const Bound &b, const APInt &cnst)
+ : value(b.value - cnst), upper_bound(b.upper_bound) {}
/// Test if Bound is an upper bound
bool isUpperBound() const { return upper_bound; }
@@ -89,15 +90,15 @@ class ABCD : public FunctionPass {
int32_t getBitWidth() const { return value.getBitWidth(); }
/// Creates a Bound incrementing the one received
- static Bound *createIncrement(const Bound *b) {
- return new Bound(b->isUpperBound() ? b->value+1 : b->value-1,
- b->upper_bound);
+ static Bound createIncrement(const Bound &b) {
+ return Bound(b.isUpperBound() ? b.value+1 : b.value-1,
+ b.upper_bound);
}
/// Creates a Bound decrementing the one received
- static Bound *createDecrement(const Bound *b) {
- return new Bound(b->isUpperBound() ? b->value-1 : b->value+1,
- b->upper_bound);
+ static Bound createDecrement(const Bound &b) {
+ return Bound(b.isUpperBound() ? b.value-1 : b.value+1,
+ b.upper_bound);
}
/// Test if two bounds are equal
@@ -109,36 +110,31 @@ class ABCD : public FunctionPass {
}
/// Test if val is less than or equal to Bound b
- static bool leq(APInt val, const Bound *b) {
- if (!b) return false;
- return b->isUpperBound() ? val.sle(b->value) : val.sge(b->value);
+ static bool leq(APInt val, const Bound &b) {
+ return b.isUpperBound() ? val.sle(b.value) : val.sge(b.value);
}
/// Test if Bound a is less then or equal to Bound
- static bool leq(const Bound *a, const Bound *b) {
- if (!a || !b) return false;
-
- assert(a->isUpperBound() == b->isUpperBound());
- return a->isUpperBound() ? a->value.sle(b->value) :
- a->value.sge(b->value);
+ static bool leq(const Bound &a, const Bound &b) {
+ assert(a.isUpperBound() == b.isUpperBound());
+ return a.isUpperBound() ? a.value.sle(b.value) :
+ a.value.sge(b.value);
}
/// Test if Bound a is less then Bound b
- static bool lt(const Bound *a, const Bound *b) {
- if (!a || !b) return false;
-
- assert(a->isUpperBound() == b->isUpperBound());
- return a->isUpperBound() ? a->value.slt(b->value) :
- a->value.sgt(b->value);
+ static bool lt(const Bound &a, const Bound &b) {
+ assert(a.isUpperBound() == b.isUpperBound());
+ return a.isUpperBound() ? a.value.slt(b.value) :
+ a.value.sgt(b.value);
}
/// Test if Bound b is greater then or equal val
- static bool geq(const Bound *b, APInt val) {
+ static bool geq(const Bound &b, APInt val) {
return leq(val, b);
}
/// Test if Bound a is greater then or equal Bound b
- static bool geq(const Bound *a, const Bound *b) {
+ static bool geq(const Bound &a, const Bound &b) {
return leq(b, a);
}
@@ -152,29 +148,36 @@ class ABCD : public FunctionPass {
/// minimum true and minimum reduced results are stored
class MemoizedResultChart {
public:
- MemoizedResultChart()
- : max_false(NULL), min_true(NULL), min_reduced(NULL) {}
+ MemoizedResultChart() {}
+ MemoizedResultChart(const MemoizedResultChart &other) {
+ if (other.max_false)
+ max_false.reset(new Bound(*other.max_false));
+ if (other.min_true)
+ min_true.reset(new Bound(*other.min_true));
+ if (other.min_reduced)
+ min_reduced.reset(new Bound(*other.min_reduced));
+ }
/// Returns the max false
- Bound *getFalse() const { return max_false; }
+ const Bound *getFalse() const { return max_false.get(); }
/// Returns the min true
- Bound *getTrue() const { return min_true; }
+ const Bound *getTrue() const { return min_true.get(); }
/// Returns the min reduced
- Bound *getReduced() const { return min_reduced; }
+ const Bound *getReduced() const { return min_reduced.get(); }
/// Return the stored result for this bound
- ProveResult getResult(const Bound *bound) const;
+ ProveResult getResult(const Bound &bound) const;
/// Stores a false found
- void addFalse(Bound *bound);
+ void addFalse(const Bound &bound);
/// Stores a true found
- void addTrue(Bound *bound);
+ void addTrue(const Bound &bound);
/// Stores a Reduced found
- void addReduced(Bound *bound);
+ void addReduced(const Bound &bound);
/// Clears redundant reduced
/// If a min_true is smaller than a min_reduced then the min_reduced
@@ -183,13 +186,13 @@ class ABCD : public FunctionPass {
void clearRedundantReduced();
void clear() {
- delete max_false;
- delete min_true;
- delete min_reduced;
+ max_false.reset();
+ min_true.reset();
+ min_reduced.reset();
}
private:
- Bound *max_false, *min_true, *min_reduced;
+ OwningPtr<Bound> max_false, min_true, min_reduced;
};
/// This class stores the result found for a node of the graph,
@@ -198,27 +201,27 @@ class ABCD : public FunctionPass {
public:
/// Test if there is true result stored from b to a
/// that is less then the bound
- bool hasTrue(Value *b, const Bound *bound) const {
- Bound *trueBound = map.lookup(b).getTrue();
- return trueBound && Bound::leq(trueBound, bound);
+ bool hasTrue(Value *b, const Bound &bound) const {
+ const Bound *trueBound = map.lookup(b).getTrue();
+ return trueBound && Bound::leq(*trueBound, bound);
}
/// Test if there is false result stored from b to a
/// that is less then the bound
- bool hasFalse(Value *b, const Bound *bound) const {
- Bound *falseBound = map.lookup(b).getFalse();
- return falseBound && Bound::leq(falseBound, bound);
+ bool hasFalse(Value *b, const Bound &bound) const {
+ const Bound *falseBound = map.lookup(b).getFalse();
+ return falseBound && Bound::leq(*falseBound, bound);
}
/// Test if there is reduced result stored from b to a
/// that is less then the bound
- bool hasReduced(Value *b, const Bound *bound) const {
- Bound *reducedBound = map.lookup(b).getReduced();
- return reducedBound && Bound::leq(reducedBound, bound);
+ bool hasReduced(Value *b, const Bound &bound) const {
+ const Bound *reducedBound = map.lookup(b).getReduced();
+ return reducedBound && Bound::leq(*reducedBound, bound);
}
/// Returns the stored bound for b
- ProveResult getBoundResult(Value *b, Bound *bound) {
+ ProveResult getBoundResult(Value *b, const Bound &bound) {
return map[b].getResult(bound);
}
@@ -233,7 +236,7 @@ class ABCD : public FunctionPass {
}
/// Stores the bound found
- void updateBound(Value *b, Bound *bound, const ProveResult res);
+ void updateBound(Value *b, const Bound &bound, const ProveResult res);
private:
// Maps a nod in the graph with its results found.
@@ -274,7 +277,7 @@ class ABCD : public FunctionPass {
bool hasEdge(Value *V, bool upper) const;
/// Returns all edges pointed by vertex V
- SmallPtrSet<Edge *, 16> getEdges(Value *V) const {
+ SmallVector<Edge, 16> getEdges(Value *V) const {
return graph.lookup(V);
}
@@ -292,13 +295,7 @@ class ABCD : public FunctionPass {
}
private:
- DenseMap<Value *, SmallPtrSet<Edge *, 16> > graph;
-
- /// Adds a Node to the graph.
- DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator addNode(Value *V) {
- SmallPtrSet<Edge *, 16> p;
- return graph.insert(std::make_pair(V, p)).first;
- }
+ DenseMap<Value *, SmallVector<Edge, 16> > graph;
/// Prints the header of the dot file
void printHeader(raw_ostream &OS, Function &F) const;
@@ -315,7 +312,7 @@ class ABCD : public FunctionPass {
void printVertex(raw_ostream &OS, Value *source) const;
/// Prints the edge to the dot file
- void printEdge(raw_ostream &OS, Value *source, Edge *edge) const;
+ void printEdge(raw_ostream &OS, Value *source, const Edge &edge) const;
void printName(raw_ostream &OS, Value *info) const;
};
@@ -428,15 +425,15 @@ class ABCD : public FunctionPass {
bool demandProve(Value *a, Value *b, int c, bool upper_bound);
/// Prove that distance between b and a is <= bound
- ProveResult prove(Value *a, Value *b, Bound *bound, unsigned level);
+ ProveResult prove(Value *a, Value *b, const Bound &bound, unsigned level);
/// Updates the distance value for a and b
- void updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level,
+ void updateMemDistance(Value *a, Value *b, const Bound &bound, unsigned level,
meet_function meet);
InequalityGraph inequality_graph;
MemoizedResult mem_result;
- DenseMap<Value*, Bound*> active;
+ DenseMap<Value*, const Bound*> active;
SmallPtrSet<Value*, 16> created;
SmallVector<PHINode *, 16> phis_to_remove;
};
@@ -857,7 +854,7 @@ PHINode *ABCD::findSigma(BasicBlock *BB, Instruction *I) {
/// This implementation works on any kind of inequality branch.
bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
int32_t width = cast<IntegerType>(a->getType())->getBitWidth();
- Bound *bound = new Bound(APInt(width, c), upper_bound);
+ Bound bound(APInt(width, c), upper_bound);
mem_result.clear();
active.clear();
@@ -867,7 +864,7 @@ bool ABCD::demandProve(Value *a, Value *b, int c, bool upper_bound) {
}
/// Prove that distance between b and a is <= bound
-ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound,
+ABCD::ProveResult ABCD::prove(Value *a, Value *b, const Bound &bound,
unsigned level) {
// if (C[b-a<=e] == True for some e <= bound
// Same or stronger difference was already proven
@@ -885,22 +882,22 @@ ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound,
return Reduced;
// traversal reached the source vertex
- if (a == b && Bound::geq(bound, APInt(bound->getBitWidth(), 0, true)))
+ if (a == b && Bound::geq(bound, APInt(bound.getBitWidth(), 0, true)))
return True;
// if b has no predecessor then fail
- if (!inequality_graph.hasEdge(b, bound->isUpperBound()))
+ if (!inequality_graph.hasEdge(b, bound.isUpperBound()))
return False;
// a cycle was encountered
if (active.count(b)) {
- if (Bound::leq(active.lookup(b), bound))
+ if (Bound::leq(*active.lookup(b), bound))
return Reduced; // a "harmless" cycle
return False; // an amplifying cycle
}
- active[b] = bound;
+ active[b] = &bound;
PHINode *PN = dyn_cast<PHINode>(b);
// Test if a Value is a Phi. If it is a PHINode with more than 1 incoming
@@ -917,23 +914,23 @@ ABCD::ProveResult ABCD::prove(Value *a, Value *b, Bound *bound,
}
/// Updates the distance value for a and b
-void ABCD::updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level,
- meet_function meet) {
+void ABCD::updateMemDistance(Value *a, Value *b, const Bound &bound,
+ unsigned level, meet_function meet) {
ABCD::ProveResult res = (meet == max) ? False : True;
- SmallPtrSet<Edge *, 16> Edges = inequality_graph.getEdges(b);
- SmallPtrSet<Edge *, 16>::iterator begin = Edges.begin(), end = Edges.end();
+ SmallVector<Edge, 16> Edges = inequality_graph.getEdges(b);
+ SmallVector<Edge, 16>::iterator begin = Edges.begin(), end = Edges.end();
for (; begin != end ; ++begin) {
if (((res >= Reduced) && (meet == max)) ||
((res == False) && (meet == min))) {
break;
}
- Edge *in = *begin;
- if (in->isUpperBound() == bound->isUpperBound()) {
- Value *succ = in->getVertex();
- res = meet(res, prove(a, succ, new Bound(bound, in->getValue()),
- level+1));
+ const Edge &in = *begin;
+ if (in.isUpperBound() == bound.isUpperBound()) {
+ Value *succ = in.getVertex();
+ res = meet(res, prove(a, succ, Bound(bound, in.getValue()),
+ level+1));
}
}
@@ -941,53 +938,53 @@ void ABCD::updateMemDistance(Value *a, Value *b, Bound *bound, unsigned level,
}
/// Return the stored result for this bound
-ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound *bound)const{
- if (max_false && Bound::leq(bound, max_false))
+ABCD::ProveResult ABCD::MemoizedResultChart::getResult(const Bound &bound)const{
+ if (max_false && Bound::leq(bound, *max_false))
return False;
- if (min_true && Bound::leq(min_true, bound))
+ if (min_true && Bound::leq(*min_true, bound))
return True;
- if (min_reduced && Bound::leq(min_reduced, bound))
+ if (min_reduced && Bound::leq(*min_reduced, bound))
return Reduced;
return False;
}
/// Stores a false found
-void ABCD::MemoizedResultChart::addFalse(Bound *bound) {
- if (!max_false || Bound::leq(max_false, bound))
- max_false = bound;
-
- if (Bound::eq(max_false, min_reduced))
- min_reduced = Bound::createIncrement(min_reduced);
- if (Bound::eq(max_false, min_true))
- min_true = Bound::createIncrement(min_true);
- if (Bound::eq(min_reduced, min_true))
- min_reduced = NULL;
+void ABCD::MemoizedResultChart::addFalse(const Bound &bound) {
+ if (!max_false || Bound::leq(*max_false, bound))
+ max_false.reset(new Bound(bound));
+
+ if (Bound::eq(max_false.get(), min_reduced.get()))
+ min_reduced.reset(new Bound(Bound::createIncrement(*min_reduced)));
+ if (Bound::eq(max_false.get(), min_true.get()))
+ min_true.reset(new Bound(Bound::createIncrement(*min_true)));
+ if (Bound::eq(min_reduced.get(), min_true.get()))
+ min_reduced.reset();
clearRedundantReduced();
}
/// Stores a true found
-void ABCD::MemoizedResultChart::addTrue(Bound *bound) {
- if (!min_true || Bound::leq(bound, min_true))
- min_true = bound;
-
- if (Bound::eq(min_true, min_reduced))
- min_reduced = Bound::createDecrement(min_reduced);
- if (Bound::eq(min_true, max_false))
- max_false = Bound::createDecrement(max_false);
- if (Bound::eq(max_false, min_reduced))
- min_reduced = NULL;
+void ABCD::MemoizedResultChart::addTrue(const Bound &bound) {
+ if (!min_true || Bound::leq(bound, *min_true))
+ min_true.reset(new Bound(bound));
+
+ if (Bound::eq(min_true.get(), min_reduced.get()))
+ min_reduced.reset(new Bound(Bound::createDecrement(*min_reduced)));
+ if (Bound::eq(min_true.get(), max_false.get()))
+ max_false.reset(new Bound(Bound::createDecrement(*max_false)));
+ if (Bound::eq(max_false.get(), min_reduced.get()))
+ min_reduced.reset();
clearRedundantReduced();
}
/// Stores a Reduced found
-void ABCD::MemoizedResultChart::addReduced(Bound *bound) {
- if (!min_reduced || Bound::leq(bound, min_reduced))
- min_reduced = bound;
-
- if (Bound::eq(min_reduced, min_true))
- min_true = Bound::createIncrement(min_true);
- if (Bound::eq(min_reduced, max_false))
- max_false = Bound::createDecrement(max_false);
+void ABCD::MemoizedResultChart::addReduced(const Bound &bound) {
+ if (!min_reduced || Bound::leq(bound, *min_reduced))
+ min_reduced.reset(new Bound(bound));
+
+ if (Bound::eq(min_reduced.get(), min_true.get()))
+ min_true.reset(new Bound(Bound::createIncrement(*min_true)));
+ if (Bound::eq(min_reduced.get(), max_false.get()))
+ max_false.reset(new Bound(Bound::createDecrement(*max_false)));
}
/// Clears redundant reduced
@@ -995,14 +992,14 @@ void ABCD::MemoizedResultChart::addReduced(Bound *bound) {
/// is unnecessary and then removed. It also works for min_reduced
/// begin smaller than max_false.
void ABCD::MemoizedResultChart::clearRedundantReduced() {
- if (min_true && min_reduced && Bound::lt(min_true, min_reduced))
- min_reduced = NULL;
- if (max_false && min_reduced && Bound::lt(min_reduced, max_false))
- min_reduced = NULL;
+ if (min_true && min_reduced && Bound::lt(*min_true, *min_reduced))
+ min_reduced.reset();
+ if (max_false && min_reduced && Bound::lt(*min_reduced, *max_false))
+ min_reduced.reset();
}
/// Stores the bound found
-void ABCD::MemoizedResult::updateBound(Value *b, Bound *bound,
+void ABCD::MemoizedResult::updateBound(Value *b, const Bound &bound,
const ProveResult res) {
if (res == False) {
map[b].addFalse(bound);
@@ -1020,19 +1017,17 @@ void ABCD::InequalityGraph::addEdge(Value *V_to, Value *V_from,
assert(cast<IntegerType>(V_from->getType())->getBitWidth() ==
value.getBitWidth());
- DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator from;
- from = addNode(V_from);
- from->second.insert(new Edge(V_to, value, upper));
+ graph[V_from].push_back(Edge(V_to, value, upper));
}
/// Test if there is any edge from V in the upper direction
bool ABCD::InequalityGraph::hasEdge(Value *V, bool upper) const {
- SmallPtrSet<Edge *, 16> it = graph.lookup(V);
+ SmallVector<Edge, 16> it = graph.lookup(V);
- SmallPtrSet<Edge *, 16>::iterator begin = it.begin();
- SmallPtrSet<Edge *, 16>::iterator end = it.end();
+ SmallVector<Edge, 16>::iterator begin = it.begin();
+ SmallVector<Edge, 16>::iterator end = it.end();
for (; begin != end; ++begin) {
- if ((*begin)->isUpperBound() == upper) {
+ if (begin->isUpperBound() == upper) {
return true;
}
}
@@ -1049,18 +1044,18 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
/// Prints the body of the dot file
void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin =
+ DenseMap<Value *, SmallVector<Edge, 16> >::const_iterator begin =
graph.begin(), end = graph.end();
for (; begin != end ; ++begin) {
- SmallPtrSet<Edge *, 16>::iterator begin_par =
+ SmallVector<Edge, 16>::const_iterator begin_par =
begin->second.begin(), end_par = begin->second.end();
Value *source = begin->first;
printVertex(OS, source);
for (; begin_par != end_par ; ++begin_par) {
- Edge *edge = *begin_par;
+ const Edge &edge = *begin_par;
printEdge(OS, source, edge);
}
}
@@ -1079,10 +1074,10 @@ void ABCD::InequalityGraph::printVertex(raw_ostream &OS, Value *source) const {
/// Prints the edge to the dot file
void ABCD::InequalityGraph::printEdge(raw_ostream &OS, Value *source,
- Edge *edge) const {
- Value *dest = edge->getVertex();
- APInt value = edge->getValue();
- bool upper = edge->isUpperBound();
+ const Edge &edge) const {
+ Value *dest = edge.getVertex();
+ APInt value = edge.getValue();
+ bool upper = edge.isUpperBound();
OS << "\"";
printName(OS, source);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 50c9630..93e9bfb 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -174,7 +174,7 @@ bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
// don't mess around with them.
BasicBlock::const_iterator BBI = BB->begin();
while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
- for (Value::use_const_iterator UI = PN->use_begin(), E = PN->use_end();
+ for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
UI != E; ++UI) {
const Instruction *User = cast<Instruction>(*UI);
if (User->getParent() != DestBB || !isa<PHINode>(User))
@@ -714,8 +714,12 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
MemoryInst->replaceUsesOfWith(Addr, SunkAddr);
- if (Addr->use_empty())
+ if (Addr->use_empty()) {
RecursivelyDeleteTriviallyDeadInstructions(Addr);
+ // This address is now available for reassignment, so erase the table entry;
+ // we don't want to match some completely different instruction.
+ SunkAddrs[Addr] = 0;
+ }
return true;
}
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index fcb802a..642d59d 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -1004,18 +1004,18 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
// If the load and store are to the exact same address, they should have been
// a must alias. AA must have gotten confused.
- // FIXME: Study to see if/when this happens.
- if (LoadOffset == StoreOffset) {
+ // FIXME: Study to see if/when this happens. One case is forwarding a memset
+ // to a load from the base of the memset.
#if 0
+ if (LoadOffset == StoreOffset) {
dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
<< "Base = " << *StoreBase << "\n"
<< "Store Ptr = " << *WritePtr << "\n"
<< "Store Offs = " << StoreOffset << "\n"
<< "Load Ptr = " << *LoadPtr << "\n";
abort();
-#endif
- return -1;
}
+#endif
// If the load and store don't overlap at all, the store doesn't provide
// anything to the load. In this case, they really don't alias at all, AA
@@ -1031,11 +1031,11 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
bool isAAFailure = false;
- if (StoreOffset < LoadOffset) {
+ if (StoreOffset < LoadOffset)
isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
- } else {
+ else
isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
- }
+
if (isAAFailure) {
#if 0
dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index eb04d94..988a4cb 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -553,22 +553,26 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
// New instructions were inserted at the end of the preheader.
if (isa<PHINode>(I))
break;
+
// Don't move instructions which might have side effects, since the side
- // effects need to complete before instructions inside the loop. Also
- // don't move instructions which might read memory, since the loop may
- // modify memory. Note that it's okay if the instruction might have
- // undefined behavior: LoopSimplify guarantees that the preheader
- // dominates the exit block.
+ // effects need to complete before instructions inside the loop. Also don't
+ // move instructions which might read memory, since the loop may modify
+ // memory. Note that it's okay if the instruction might have undefined
+ // behavior: LoopSimplify guarantees that the preheader dominates the exit
+ // block.
if (I->mayHaveSideEffects() || I->mayReadFromMemory())
continue;
+
// Skip debug info intrinsics.
if (isa<DbgInfoIntrinsic>(I))
continue;
+
// Don't sink static AllocaInsts out of the entry block, which would
// turn them into dynamic allocas!
if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
if (AI->isStaticAlloca())
continue;
+
// Determine if there is a use in or before the loop (direct or
// otherwise).
bool UsedInLoop = false;
@@ -585,19 +589,29 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
break;
}
}
+
// If there is, the def must remain in the preheader.
if (UsedInLoop)
continue;
+
// Otherwise, sink it to the exit block.
Instruction *ToMove = I;
bool Done = false;
- if (I != Preheader->begin())
- --I;
- else
+
+ if (I != Preheader->begin()) {
+ // Skip debug info intrinsics.
+ do {
+ --I;
+ } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+ if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+ Done = true;
+ } else {
Done = true;
+ }
+
ToMove->moveBefore(InsertPt);
- if (Done)
- break;
+ if (Done) break;
InsertPt = ToMove;
}
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index f920dca..625a75d 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1899,7 +1899,7 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
const Value *V = U->getValue();
if (const Instruction *Inst = dyn_cast<Instruction>(V))
if (L->contains(Inst)) continue;
- for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
+ for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
const Instruction *UserInst = dyn_cast<Instruction>(*UI);
// Ignore non-instructions.
@@ -2827,6 +2827,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
IP = Tentative;
}
while (isa<PHINode>(IP)) ++IP;
+ while (isa<DbgInfoIntrinsic>(IP)) ++IP;
// Inform the Rewriter if we have a post-increment use, so that it can
// perform an advantageous expansion.
@@ -2864,8 +2865,10 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
// so that it is dominated by its operand. If the original insert point
// was already dominated by the increment, keep it, because there may
// be loop-variant operands that need to be respected also.
- if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP))
+ if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP)) {
IP = IVIncInsertPos;
+ while (isa<DbgInfoIntrinsic>(IP)) ++IP;
+ }
break;
}
Start = AR->getStart();
diff --git a/lib/Transforms/Scalar/Reg2Mem.cpp b/lib/Transforms/Scalar/Reg2Mem.cpp
index 99e1252..7a6eec3 100644
--- a/lib/Transforms/Scalar/Reg2Mem.cpp
+++ b/lib/Transforms/Scalar/Reg2Mem.cpp
@@ -45,7 +45,7 @@ namespace {
bool valueEscapes(const Instruction *Inst) const {
const BasicBlock *BB = Inst->getParent();
- for (Value::use_const_iterator UI = Inst->use_begin(),E = Inst->use_end();
+ for (Value::const_use_iterator UI = Inst->use_begin(),E = Inst->use_end();
UI != E; ++UI)
if (cast<Instruction>(*UI)->getParent() != BB ||
isa<PHINode>(*UI))
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 7e37938..546b7b6 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -1705,28 +1705,31 @@ ModulePass *llvm::createIPSCCPPass() {
}
-static bool AddressIsTaken(GlobalValue *GV) {
+static bool AddressIsTaken(const GlobalValue *GV) {
// Delete any dead constantexpr klingons.
GV->removeDeadConstantUsers();
- for (Value::use_iterator UI = GV->use_begin(), E = GV->use_end();
- UI != E; ++UI)
- if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
+ for (Value::const_use_iterator UI = GV->use_begin(), E = GV->use_end();
+ UI != E; ++UI) {
+ const User *U = *UI;
+ if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
if (SI->getOperand(0) == GV || SI->isVolatile())
return true; // Storing addr of GV.
- } else if (isa<InvokeInst>(*UI) || isa<CallInst>(*UI)) {
+ } else if (isa<InvokeInst>(U) || isa<CallInst>(U)) {
// Make sure we are calling the function, not passing the address.
- if (UI.getOperandNo() != 0)
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(UI))
return true;
- } else if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
+ } else if (const LoadInst *LI = dyn_cast<LoadInst>(U)) {
if (LI->isVolatile())
return true;
- } else if (isa<BlockAddress>(*UI)) {
+ } else if (isa<BlockAddress>(U)) {
// blockaddress doesn't take the address of the function, it takes addr
// of label.
} else {
return true;
}
+ }
return false;
}
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 738c5e8..b621e8d 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -79,7 +79,7 @@ static void ChangeToUnreachable(Instruction *I) {
/// ChangeToCall - Convert the specified invoke into a normal call.
static void ChangeToCall(InvokeInst *II) {
BasicBlock *BB = II->getParent();
- SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
Args.end(), "", II);
NewCall->takeName(II);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 22f3628..5941ea6 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -350,10 +350,16 @@ struct StrNCmpOpt : public LibCallOptimization {
// 'strcpy' Optimizations
struct StrCpyOpt : public LibCallOptimization {
+ bool OptChkCall; // True if it's optimizing a __strcpy_chk libcall.
+
+ StrCpyOpt(bool c) : OptChkCall(c) {}
+
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Verify the "strcpy" function prototype.
+ unsigned NumParams = OptChkCall ? 3 : 2;
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) ||
+ if (FT->getNumParams() != NumParams ||
+ FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context))
return 0;
@@ -371,8 +377,13 @@ struct StrCpyOpt : public LibCallOptimization {
// We have enough information to now generate the memcpy call to do the
// concatenation for us. Make a memcpy to copy the nul byte with align = 1.
- EmitMemCpy(Dst, Src,
- ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
+ if (OptChkCall)
+ EmitMemCpyChk(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len),
+ CI->getOperand(3), B, TD);
+ else
+ EmitMemCpy(Dst, Src,
+ ConstantInt::get(TD->getIntPtrType(*Context), Len), 1, B, TD);
return Dst;
}
};
@@ -1162,7 +1173,8 @@ namespace {
StringMap<LibCallOptimization*> Optimizations;
// String and Memory LibCall Optimizations
StrCatOpt StrCat; StrNCatOpt StrNCat; StrChrOpt StrChr; StrCmpOpt StrCmp;
- StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrNCpyOpt StrNCpy; StrLenOpt StrLen;
+ StrNCmpOpt StrNCmp; StrCpyOpt StrCpy; StrCpyOpt StrCpyChk;
+ StrNCpyOpt StrNCpy; StrLenOpt StrLen;
StrToOpt StrTo; StrStrOpt StrStr;
MemCmpOpt MemCmp; MemCpyOpt MemCpy; MemMoveOpt MemMove; MemSetOpt MemSet;
// Math Library Optimizations
@@ -1177,8 +1189,7 @@ namespace {
bool Modified; // This is only used by doInitialization.
public:
static char ID; // Pass identification
- SimplifyLibCalls() : FunctionPass(&ID) {}
-
+ SimplifyLibCalls() : FunctionPass(&ID), StrCpy(false), StrCpyChk(true) {}
void InitOptimizations();
bool runOnFunction(Function &F);
@@ -1228,6 +1239,9 @@ void SimplifyLibCalls::InitOptimizations() {
Optimizations["memmove"] = &MemMove;
Optimizations["memset"] = &MemSet;
+ // _chk variants of String and Memory LibCall Optimizations.
+ Optimizations["__strcpy_chk"] = &StrCpyChk;
+
// Math Library Optimizations
Optimizations["powf"] = &Pow;
Optimizations["pow"] = &Pow;
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index be6b383..c70bab5 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -440,8 +440,9 @@ static bool FindAllMemoryUses(Instruction *I,
}
if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
- if (UI.getOperandNo() == 0) return true; // Storing addr, not into addr.
- MemoryUses.push_back(std::make_pair(SI, UI.getOperandNo()));
+ unsigned opNo = UI.getOperandNo();
+ if (opNo == 0) return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 3657390..8c25ad1 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -94,7 +94,7 @@ bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum,
if (TI->getNumSuccessors() == 1) return false;
const BasicBlock *Dest = TI->getSuccessor(SuccNum);
- pred_const_iterator I = pred_begin(Dest), E = pred_end(Dest);
+ const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest);
// If there is more than one predecessor, this is a critical edge...
assert(I != E && "No preds, but we have an edge to the block?");
diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp
index b44f019..0afccf4 100644
--- a/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -108,7 +108,7 @@ Value *llvm::EmitStrNCpy(Value *Dst, Value *Src, Value *Len,
/// EmitMemCpy - Emit a call to the memcpy function to the builder. This always
-/// expects that the size has type 'intptr_t' and Dst/Src are pointers.
+/// expects that Len has type 'intptr_t' and Dst/Src are pointers.
Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
unsigned Align, IRBuilder<> &B, const TargetData *TD) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
@@ -120,10 +120,34 @@ Value *llvm::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
ConstantInt::get(B.getInt32Ty(), Align));
}
+/// EmitMemCpyChk - Emit a call to the __memcpy_chk function to the builder.
+/// This expects that the Len and ObjSize have type 'intptr_t' and Dst/Src
+/// are pointers.
+Value *llvm::EmitMemCpyChk(Value *Dst, Value *Src, Value *Len, Value *ObjSize,
+ IRBuilder<> &B, const TargetData *TD) {
+ Module *M = B.GetInsertBlock()->getParent()->getParent();
+ AttributeWithIndex AWI;
+ AWI = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
+ LLVMContext &Context = B.GetInsertBlock()->getContext();
+ Value *MemCpy = M->getOrInsertFunction("__memcpy_chk",
+ AttrListPtr::get(&AWI, 1),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ B.getInt8PtrTy(),
+ TD->getIntPtrType(Context),
+ TD->getIntPtrType(Context), NULL);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ CallInst *CI = B.CreateCall4(MemCpy, Dst, Src, Len, ObjSize);
+ if (const Function *F = dyn_cast<Function>(MemCpy->stripPointerCasts()))
+ CI->setCallingConv(F->getCallingConv());
+ return CI;
+}
+
/// EmitMemMove - Emit a call to the memmove function to the builder. This
/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
Value *llvm::EmitMemMove(Value *Dst, Value *Src, Value *Len,
- unsigned Align, IRBuilder<> &B, const TargetData *TD) {
+ unsigned Align, IRBuilder<> &B, const TargetData *TD) {
Module *M = B.GetInsertBlock()->getParent()->getParent();
LLVMContext &Context = B.GetInsertBlock()->getContext();
const Type *Ty = TD->getIntPtrType(Context);
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 766c4d9..bbbcc1a 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -226,7 +226,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
bool Changed = false;
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
if (InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) {
- std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end());
+ std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3);
// Insert a normal call instruction...
CallInst *NewCall = CallInst::Create(II->getCalledValue(),
CallArgs.begin(), CallArgs.end(), "",II);
@@ -298,7 +298,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
CatchSwitch->addCase(InvokeNoC, II->getUnwindDest());
// Insert a normal call instruction.
- std::vector<Value*> CallArgs(II->op_begin()+3, II->op_end());
+ std::vector<Value*> CallArgs(II->op_begin(), II->op_end() - 3);
CallInst *NewCall = CallInst::Create(II->getCalledValue(),
CallArgs.begin(), CallArgs.end(), "",
II);
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 4f5a70b..f181f3a 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -68,7 +68,7 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
// assignments to subsections of the memory unit.
// Only allow direct and non-volatile loads and stores...
- for (Value::use_const_iterator UI = AI->use_begin(), UE = AI->use_end();
+ for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end();
UI != UE; ++UI) // Loop over all of the uses of the alloca
if (const LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
if (LI->isVolatile())
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index a31235a..292332e 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -14,31 +14,82 @@
#include "llvm/Transforms/Utils/SSAUpdater.h"
#include "llvm/Instructions.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-typedef DenseMap<BasicBlock*, TrackingVH<Value> > AvailableValsTy;
-typedef std::vector<std::pair<BasicBlock*, TrackingVH<Value> > >
- IncomingPredInfoTy;
+/// BBInfo - Per-basic block information used internally by SSAUpdater.
+/// The predecessors of each block are cached here since pred_iterator is
+/// slow and we need to iterate over the blocks at least a few times.
+class SSAUpdater::BBInfo {
+public:
+ Value *AvailableVal; // Value to use in this block.
+ BasicBlock *DefBB; // Block that defines the available value.
+ unsigned NumPreds; // Number of predecessor blocks.
+ BasicBlock **Preds; // Array[NumPreds] of predecessor blocks.
+ unsigned Counter; // Marker to identify blocks already visited.
+ PHINode *PHITag; // Marker for existing PHIs that match.
+
+ BBInfo(BasicBlock *BB, Value *V, BumpPtrAllocator *Allocator);
+};
+typedef DenseMap<BasicBlock*, SSAUpdater::BBInfo*> BBMapTy;
+
+SSAUpdater::BBInfo::BBInfo(BasicBlock *BB, Value *V,
+ BumpPtrAllocator *Allocator)
+ : AvailableVal(V), DefBB(0), NumPreds(0), Preds(0), Counter(0), PHITag(0) {
+ // If this block has a known value, don't bother finding its predecessors.
+ if (V) {
+ DefBB = BB;
+ return;
+ }
+
+ // We can get our predecessor info by walking the pred_iterator list, but it
+ // is relatively slow. If we already have PHI nodes in this block, walk one
+ // of them to get the predecessor list instead.
+ if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
+ NumPreds = SomePhi->getNumIncomingValues();
+ Preds = static_cast<BasicBlock**>
+ (Allocator->Allocate(NumPreds * sizeof(BasicBlock*),
+ AlignOf<BasicBlock*>::Alignment));
+ for (unsigned pi = 0; pi != NumPreds; ++pi)
+ Preds[pi] = SomePhi->getIncomingBlock(pi);
+ return;
+ }
+
+ // Stash the predecessors in a temporary vector until we know how much space
+ // to allocate for them.
+ SmallVector<BasicBlock*, 10> TmpPreds;
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ TmpPreds.push_back(*PI);
+ ++NumPreds;
+ }
+ Preds = static_cast<BasicBlock**>
+ (Allocator->Allocate(NumPreds * sizeof(BasicBlock*),
+ AlignOf<BasicBlock*>::Alignment));
+ memcpy(Preds, TmpPreds.data(), NumPreds * sizeof(BasicBlock*));
+}
+typedef DenseMap<BasicBlock*, Value*> AvailableValsTy;
static AvailableValsTy &getAvailableVals(void *AV) {
return *static_cast<AvailableValsTy*>(AV);
}
-static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
- return *static_cast<IncomingPredInfoTy*>(IPI);
+static BBMapTy *getBBMap(void *BM) {
+ return static_cast<BBMapTy*>(BM);
}
+static BumpPtrAllocator *getAllocator(void *BPA) {
+ return static_cast<BumpPtrAllocator*>(BPA);
+}
SSAUpdater::SSAUpdater(SmallVectorImpl<PHINode*> *NewPHI)
- : AV(0), PrototypeValue(0), IPI(0), InsertedPHIs(NewPHI) {}
+ : AV(0), PrototypeValue(0), BM(0), BPA(0), InsertedPHIs(NewPHI) {}
SSAUpdater::~SSAUpdater() {
delete &getAvailableVals(AV);
- delete &getIncomingPredInfo(IPI);
}
/// Initialize - Reset this object to get ready for a new set of SSA
@@ -48,11 +99,6 @@ void SSAUpdater::Initialize(Value *ProtoValue) {
AV = new AvailableValsTy();
else
getAvailableVals(AV).clear();
-
- if (IPI == 0)
- IPI = new IncomingPredInfoTy();
- else
- getIncomingPredInfo(IPI).clear();
PrototypeValue = ProtoValue;
}
@@ -73,7 +119,7 @@ void SSAUpdater::AddAvailableValue(BasicBlock *BB, Value *V) {
/// IsEquivalentPHI - Check if PHI has the same incoming value as specified
/// in ValueMapping for each predecessor block.
-static bool IsEquivalentPHI(PHINode *PHI,
+static bool IsEquivalentPHI(PHINode *PHI,
DenseMap<BasicBlock*, Value*> &ValueMapping) {
unsigned PHINumValues = PHI->getNumIncomingValues();
if (PHINumValues != ValueMapping.size())
@@ -89,38 +135,12 @@ static bool IsEquivalentPHI(PHINode *PHI,
return true;
}
-/// GetExistingPHI - Check if BB already contains a phi node that is equivalent
-/// to the specified mapping from predecessor blocks to incoming values.
-static Value *GetExistingPHI(BasicBlock *BB,
- DenseMap<BasicBlock*, Value*> &ValueMapping) {
- PHINode *SomePHI;
- for (BasicBlock::iterator It = BB->begin();
- (SomePHI = dyn_cast<PHINode>(It)); ++It) {
- if (IsEquivalentPHI(SomePHI, ValueMapping))
- return SomePHI;
- }
- return 0;
-}
-
-/// GetExistingPHI - Check if BB already contains an equivalent phi node.
-/// The InputIt type must be an iterator over std::pair<BasicBlock*, Value*>
-/// objects that specify the mapping from predecessor blocks to incoming values.
-template<typename InputIt>
-static Value *GetExistingPHI(BasicBlock *BB, const InputIt &I,
- const InputIt &E) {
- // Avoid create the mapping if BB has no phi nodes at all.
- if (!isa<PHINode>(BB->begin()))
- return 0;
- DenseMap<BasicBlock*, Value*> ValueMapping(I, E);
- return GetExistingPHI(BB, ValueMapping);
-}
-
/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
/// live at the end of the specified block.
Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
- assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+ assert(BM == 0 && BPA == 0 && "Unexpected Internal State");
Value *Res = GetValueAtEndOfBlockInternal(BB);
- assert(getIncomingPredInfo(IPI).empty() && "Unexpected Internal State");
+ assert(BM == 0 && BPA == 0 && "Unexpected Internal State");
return Res;
}
@@ -146,7 +166,7 @@ Value *SSAUpdater::GetValueAtEndOfBlock(BasicBlock *BB) {
Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
// If there is no definition of the renamed variable in this block, just use
// GetValueAtEndOfBlock to do our work.
- if (!getAvailableVals(AV).count(BB))
+ if (!HasValueForBlock(BB))
return GetValueAtEndOfBlock(BB);
// Otherwise, we have the hard case. Get the live-in values for each
@@ -193,10 +213,18 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
if (SingularValue != 0)
return SingularValue;
- // Otherwise, we do need a PHI.
- if (Value *ExistingPHI = GetExistingPHI(BB, PredValues.begin(),
- PredValues.end()))
- return ExistingPHI;
+ // Otherwise, we do need a PHI: check to see if we already have one available
+ // in this block that produces the right value.
+ if (isa<PHINode>(BB->begin())) {
+ DenseMap<BasicBlock*, Value*> ValueMapping(PredValues.begin(),
+ PredValues.end());
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (IsEquivalentPHI(SomePHI, ValueMapping))
+ return SomePHI;
+ }
+ }
// Ok, we have no way out, insert a new one now.
PHINode *InsertedPHI = PHINode::Create(PrototypeValue->getType(),
@@ -226,7 +254,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {
/// which use their value in the corresponding predecessor.
void SSAUpdater::RewriteUse(Use &U) {
Instruction *User = cast<Instruction>(U.getUser());
-
+
Value *V;
if (PHINode *UserPN = dyn_cast<PHINode>(User))
V = GetValueAtEndOfBlock(UserPN->getIncomingBlock(U));
@@ -236,161 +264,264 @@ void SSAUpdater::RewriteUse(Use &U) {
U.set(V);
}
-
/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
/// for the specified BB and if so, return it. If not, construct SSA form by
-/// walking predecessors inserting PHI nodes as needed until we get to a block
-/// where the value is available.
-///
+/// first calculating the required placement of PHIs and then inserting new
+/// PHIs where needed.
Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ if (Value *V = AvailableVals[BB])
+ return V;
+
+ // Pool allocation used internally by GetValueAtEndOfBlock.
+ BumpPtrAllocator AllocatorObj;
+ BBMapTy BBMapObj;
+ BPA = &AllocatorObj;
+ BM = &BBMapObj;
+
+ BBInfo *Info = new (AllocatorObj) BBInfo(BB, 0, &AllocatorObj);
+ BBMapObj[BB] = Info;
+
+ bool Changed;
+ unsigned Counter = 1;
+ do {
+ Changed = false;
+ FindPHIPlacement(BB, Info, Changed, Counter);
+ ++Counter;
+ } while (Changed);
+
+ FindAvailableVal(BB, Info, Counter);
+
+ BPA = 0;
+ BM = 0;
+ return Info->AvailableVal;
+}
- // Query AvailableVals by doing an insertion of null.
- std::pair<AvailableValsTy::iterator, bool> InsertRes =
- AvailableVals.insert(std::make_pair(BB, TrackingVH<Value>()));
-
- // Handle the case when the insertion fails because we have already seen BB.
- if (!InsertRes.second) {
- // If the insertion failed, there are two cases. The first case is that the
- // value is already available for the specified block. If we get this, just
- // return the value.
- if (InsertRes.first->second != 0)
- return InsertRes.first->second;
-
- // Otherwise, if the value we find is null, then this is the value is not
- // known but it is being computed elsewhere in our recursion. This means
- // that we have a cycle. Handle this by inserting a PHI node and returning
- // it. When we get back to the first instance of the recursion we will fill
- // in the PHI node.
- return InsertRes.first->second =
- PHINode::Create(PrototypeValue->getType(), PrototypeValue->getName(),
- &BB->front());
+/// FindPHIPlacement - Recursively visit the predecessors of a block to find
+/// the reaching definition for each predecessor and then determine whether
+/// a PHI is needed in this block.
+void SSAUpdater::FindPHIPlacement(BasicBlock *BB, BBInfo *Info, bool &Changed,
+ unsigned Counter) {
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ BBMapTy *BBMap = getBBMap(BM);
+ BumpPtrAllocator *Allocator = getAllocator(BPA);
+ bool BBNeedsPHI = false;
+ BasicBlock *SamePredDefBB = 0;
+
+ // If there are no predecessors, then we must have found an unreachable
+ // block. Treat it as a definition with 'undef'.
+ if (Info->NumPreds == 0) {
+ Info->AvailableVal = UndefValue::get(PrototypeValue->getType());
+ Info->DefBB = BB;
+ return;
}
- // Okay, the value isn't in the map and we just inserted a null in the entry
- // to indicate that we're processing the block. Since we have no idea what
- // value is in this block, we have to recurse through our predecessors.
- //
- // While we're walking our predecessors, we keep track of them in a vector,
- // then insert a PHI node in the end if we actually need one. We could use a
- // smallvector here, but that would take a lot of stack space for every level
- // of the recursion, just use IncomingPredInfo as an explicit stack.
- IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
- unsigned FirstPredInfoEntry = IncomingPredInfo.size();
-
- // As we're walking the predecessors, keep track of whether they are all
- // producing the same value. If so, this value will capture it, if not, it
- // will get reset to null. We distinguish the no-predecessor case explicitly
- // below.
- TrackingVH<Value> ExistingValue;
-
- // We can get our predecessor info by walking the pred_iterator list, but it
- // is relatively slow. If we already have PHI nodes in this block, walk one
- // of them to get the predecessor list instead.
- if (PHINode *SomePhi = dyn_cast<PHINode>(BB->begin())) {
- for (unsigned i = 0, e = SomePhi->getNumIncomingValues(); i != e; ++i) {
- BasicBlock *PredBB = SomePhi->getIncomingBlock(i);
- Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
- IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
-
- // Set ExistingValue to singular value from all predecessors so far.
- if (i == 0)
- ExistingValue = PredVal;
- else if (PredVal != ExistingValue)
- ExistingValue = 0;
+ Info->Counter = Counter;
+ for (unsigned pi = 0; pi != Info->NumPreds; ++pi) {
+ BasicBlock *Pred = Info->Preds[pi];
+ BBMapTy::value_type &BBMapBucket = BBMap->FindAndConstruct(Pred);
+ if (!BBMapBucket.second) {
+ Value *PredVal = AvailableVals.lookup(Pred);
+ BBMapBucket.second = new (*Allocator) BBInfo(Pred, PredVal, Allocator);
}
- } else {
- bool isFirstPred = true;
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *PredBB = *PI;
- Value *PredVal = GetValueAtEndOfBlockInternal(PredBB);
- IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
-
- // Set ExistingValue to singular value from all predecessors so far.
- if (isFirstPred) {
- ExistingValue = PredVal;
- isFirstPred = false;
- } else if (PredVal != ExistingValue)
- ExistingValue = 0;
+ BBInfo *PredInfo = BBMapBucket.second;
+ BasicBlock *DefBB = 0;
+ if (!PredInfo->AvailableVal) {
+ if (PredInfo->Counter != Counter)
+ FindPHIPlacement(Pred, PredInfo, Changed, Counter);
+
+ // Ignore back edges where the value is not yet known.
+ if (!PredInfo->DefBB)
+ continue;
}
+ DefBB = PredInfo->DefBB;
+
+ if (!SamePredDefBB)
+ SamePredDefBB = DefBB;
+ else if (DefBB != SamePredDefBB)
+ BBNeedsPHI = true;
}
- // If there are no predecessors, then we must have found an unreachable block
- // just return 'undef'. Since there are no predecessors, InsertRes must not
- // be invalidated.
- if (IncomingPredInfo.size() == FirstPredInfoEntry)
- return InsertRes.first->second = UndefValue::get(PrototypeValue->getType());
-
- /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If
- /// this block is involved in a loop, a no-entry PHI node will have been
- /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted
- /// above.
- TrackingVH<Value> &InsertedVal = AvailableVals[BB];
-
- // If the predecessor values are not all the same, then check to see if there
- // is an existing PHI that can be used.
- if (!ExistingValue)
- ExistingValue = GetExistingPHI(BB,
- IncomingPredInfo.begin()+FirstPredInfoEntry,
- IncomingPredInfo.end());
-
- // If there is an existing value we can use, then we don't need to insert a
- // PHI. This is the simple and common case.
- if (ExistingValue) {
- // If a PHI node got inserted, replace it with the existing value and delete
- // it.
- if (InsertedVal) {
- PHINode *OldVal = cast<PHINode>(InsertedVal);
- // Be careful about dead loops. These RAUW's also update InsertedVal.
- if (InsertedVal != ExistingValue)
- OldVal->replaceAllUsesWith(ExistingValue);
- else
- OldVal->replaceAllUsesWith(UndefValue::get(InsertedVal->getType()));
- OldVal->eraseFromParent();
- } else {
- InsertedVal = ExistingValue;
- }
+ BasicBlock *NewDefBB = (BBNeedsPHI ? BB : SamePredDefBB);
+ if (Info->DefBB != NewDefBB) {
+ Changed = true;
+ Info->DefBB = NewDefBB;
+ }
+}
- // Either path through the 'if' should have set InsertedVal -> ExistingVal.
- assert((InsertedVal == ExistingValue || isa<UndefValue>(InsertedVal)) &&
- "RAUW didn't change InsertedVal to be ExistingValue");
+/// FindAvailableVal - If this block requires a PHI, first check if an existing
+/// PHI matches the PHI placement and reaching definitions computed earlier,
+/// and if not, create a new PHI. Visit all the block's predecessors to
+/// calculate the available value for each one and fill in the incoming values
+/// for a new PHI.
+void SSAUpdater::FindAvailableVal(BasicBlock *BB, BBInfo *Info,
+ unsigned Counter) {
+ if (Info->AvailableVal || Info->Counter == Counter)
+ return;
- // Drop the entries we added in IncomingPredInfo to restore the stack.
- IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
- IncomingPredInfo.end());
- return ExistingValue;
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ BBMapTy *BBMap = getBBMap(BM);
+
+ // Check if there needs to be a PHI in BB.
+ PHINode *NewPHI = 0;
+ if (Info->DefBB == BB) {
+ // Look for an existing PHI.
+ FindExistingPHI(BB);
+ if (!Info->AvailableVal) {
+ NewPHI = PHINode::Create(PrototypeValue->getType(),
+ PrototypeValue->getName(), &BB->front());
+ NewPHI->reserveOperandSpace(Info->NumPreds);
+ Info->AvailableVal = NewPHI;
+ AvailableVals[BB] = NewPHI;
+ }
}
- // Otherwise, we do need a PHI: insert one now if we don't already have one.
- if (InsertedVal == 0)
- InsertedVal = PHINode::Create(PrototypeValue->getType(),
- PrototypeValue->getName(), &BB->front());
+ // Iterate through the block's predecessors.
+ Info->Counter = Counter;
+ for (unsigned pi = 0; pi != Info->NumPreds; ++pi) {
+ BasicBlock *Pred = Info->Preds[pi];
+ BBInfo *PredInfo = (*BBMap)[Pred];
+ FindAvailableVal(Pred, PredInfo, Counter);
+ if (NewPHI) {
+ // Skip to the nearest preceding definition.
+ if (PredInfo->DefBB != Pred)
+ PredInfo = (*BBMap)[PredInfo->DefBB];
+ NewPHI->addIncoming(PredInfo->AvailableVal, Pred);
+ } else if (!Info->AvailableVal)
+ Info->AvailableVal = PredInfo->AvailableVal;
+ }
- PHINode *InsertedPHI = cast<PHINode>(InsertedVal);
- InsertedPHI->reserveOperandSpace(IncomingPredInfo.size()-FirstPredInfoEntry);
+ if (NewPHI) {
+ DEBUG(dbgs() << " Inserted PHI: " << *NewPHI << "\n");
- // Fill in all the predecessors of the PHI.
- for (IncomingPredInfoTy::iterator I =
- IncomingPredInfo.begin()+FirstPredInfoEntry,
- E = IncomingPredInfo.end(); I != E; ++I)
- InsertedPHI->addIncoming(I->second, I->first);
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(NewPHI);
+ }
+}
- // Drop the entries we added in IncomingPredInfo to restore the stack.
- IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
- IncomingPredInfo.end());
+/// FindExistingPHI - Look through the PHI nodes in a block to see if any of
+/// them match what is needed.
+void SSAUpdater::FindExistingPHI(BasicBlock *BB) {
+ PHINode *SomePHI;
+ for (BasicBlock::iterator It = BB->begin();
+ (SomePHI = dyn_cast<PHINode>(It)); ++It) {
+ if (CheckIfPHIMatches(SomePHI)) {
+ RecordMatchingPHI(SomePHI);
+ break;
+ }
+ ClearPHITags(SomePHI);
+ }
+}
- // See if the PHI node can be merged to a single value. This can happen in
- // loop cases when we get a PHI of itself and one other value.
- if (Value *ConstVal = InsertedPHI->hasConstantValue()) {
- InsertedPHI->replaceAllUsesWith(ConstVal);
- InsertedPHI->eraseFromParent();
- InsertedVal = ConstVal;
- } else {
- DEBUG(dbgs() << " Inserted PHI: " << *InsertedPHI << "\n");
+/// CheckIfPHIMatches - Check if a PHI node matches the placement and values
+/// in the BBMap.
+bool SSAUpdater::CheckIfPHIMatches(PHINode *PHI) {
+ BBMapTy *BBMap = getBBMap(BM);
+ SmallVector<PHINode*, 20> WorkList;
+ WorkList.push_back(PHI);
+
+ // Mark that the block containing this PHI has been visited.
+ (*BBMap)[PHI->getParent()]->PHITag = PHI;
+
+ while (!WorkList.empty()) {
+ PHI = WorkList.pop_back_val();
+
+ // Iterate through the PHI's incoming values.
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ Value *IncomingVal = PHI->getIncomingValue(i);
+ BasicBlock *Pred = PHI->getIncomingBlock(i);
+ BBInfo *PredInfo = (*BBMap)[Pred];
+ // Skip to the nearest preceding definition.
+ if (PredInfo->DefBB != Pred) {
+ Pred = PredInfo->DefBB;
+ PredInfo = (*BBMap)[Pred];
+ }
+
+ // Check if it matches the expected value.
+ if (PredInfo->AvailableVal) {
+ if (IncomingVal == PredInfo->AvailableVal)
+ continue;
+ return false;
+ }
+
+ // Check if the value is a PHI in the correct block.
+ PHINode *IncomingPHIVal = dyn_cast<PHINode>(IncomingVal);
+ if (!IncomingPHIVal || IncomingPHIVal->getParent() != Pred)
+ return false;
+
+ // If this block has already been visited, check if this PHI matches.
+ if (PredInfo->PHITag) {
+ if (IncomingPHIVal == PredInfo->PHITag)
+ continue;
+ return false;
+ }
+ PredInfo->PHITag = IncomingPHIVal;
+
+ WorkList.push_back(IncomingPHIVal);
+ }
+ }
+ return true;
+}
- // If the client wants to know about all new instructions, tell it.
- if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+/// RecordMatchingPHI - For a PHI node that matches, record it and its input
+/// PHIs in both the BBMap and the AvailableVals mapping.
+void SSAUpdater::RecordMatchingPHI(PHINode *PHI) {
+ BBMapTy *BBMap = getBBMap(BM);
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ SmallVector<PHINode*, 20> WorkList;
+ WorkList.push_back(PHI);
+
+ // Record this PHI.
+ BasicBlock *BB = PHI->getParent();
+ AvailableVals[BB] = PHI;
+ (*BBMap)[BB]->AvailableVal = PHI;
+
+ while (!WorkList.empty()) {
+ PHI = WorkList.pop_back_val();
+
+ // Iterate through the PHI's incoming values.
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
+ if (!IncomingPHIVal) continue;
+ BB = IncomingPHIVal->getParent();
+ BBInfo *Info = (*BBMap)[BB];
+ if (!Info || Info->AvailableVal)
+ continue;
+
+ // Record the PHI and add it to the worklist.
+ AvailableVals[BB] = IncomingPHIVal;
+ Info->AvailableVal = IncomingPHIVal;
+ WorkList.push_back(IncomingPHIVal);
+ }
}
+}
- return InsertedVal;
+/// ClearPHITags - When one of the existing PHI nodes fails to match, clear
+/// the PHITag values that were stored in the BBMap when checking to see if
+/// it matched.
+void SSAUpdater::ClearPHITags(PHINode *PHI) {
+ BBMapTy *BBMap = getBBMap(BM);
+ SmallVector<PHINode*, 20> WorkList;
+ WorkList.push_back(PHI);
+
+ // Clear the tag for this PHI.
+ (*BBMap)[PHI->getParent()]->PHITag = 0;
+
+ while (!WorkList.empty()) {
+ PHI = WorkList.pop_back_val();
+
+ // Iterate through the PHI's incoming values.
+ for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
+ PHINode *IncomingPHIVal = dyn_cast<PHINode>(PHI->getIncomingValue(i));
+ if (!IncomingPHIVal) continue;
+ BasicBlock *BB = IncomingPHIVal->getParent();
+ BBInfo *Info = (*BBMap)[BB];
+ if (!Info || Info->AvailableVal || !Info->PHITag)
+ continue;
+
+ // Clear the tag and add the PHI to the worklist.
+ Info->PHITag = 0;
+ WorkList.push_back(IncomingPHIVal);
+ }
+ }
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2ce5bdc..9f2209d 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -224,7 +224,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
if (BI->isUnconditional() && BI->getSuccessor(0) == BB) {
if (!AggressiveInsts) return false;
// Okay, it looks like the instruction IS in the "condition". Check to
- // see if its a cheap instruction to unconditionally compute, and if it
+ // see if it's a cheap instruction to unconditionally compute, and if it
// only uses stuff defined outside of the condition. If so, hoist it out.
if (!I->isSafeToSpeculativelyExecute())
return false;
@@ -1768,7 +1768,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
Pred->getInstList().remove(II); // Take out of symbol table
// Insert the call now.
- SmallVector<Value*,8> Args(II->op_begin()+3, II->op_end());
+ SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
CallInst *CI = CallInst::Create(II->getCalledValue(),
Args.begin(), Args.end(),
II->getName(), BI);
@@ -1970,13 +1970,13 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
II->removeFromParent(); // Take out of symbol table
// Insert the call now...
- SmallVector<Value*, 8> Args(II->op_begin()+3, II->op_end());
+ SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
CallInst *CI = CallInst::Create(II->getCalledValue(),
Args.begin(), Args.end(),
II->getName(), BI);
CI->setCallingConv(II->getCallingConv());
CI->setAttributes(II->getAttributes());
- // If the invoke produced a value, the Call does now instead.
+ // If the invoke produced a value, the call does now instead.
II->replaceAllUsesWith(CI);
delete II;
Changed = true;
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 0eb9f02..f6a6076 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1681,7 +1681,7 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
// Output predecessors for the block...
Out.PadToColumn(50);
Out << ";";
- pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
+ const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
if (PI == PE) {
Out << " No predecessors!";
@@ -1875,6 +1875,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
if (PAL.getFnAttributes() != Attribute::None)
Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
} else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
+ Operand = II->getCalledValue();
const PointerType *PTy = cast<PointerType>(Operand->getType());
const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
const Type *RetTy = FTy->getReturnType();
@@ -1912,10 +1913,10 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
writeOperand(Operand, true);
}
Out << '(';
- for (unsigned op = 3, Eop = I.getNumOperands(); op < Eop; ++op) {
- if (op > 3)
+ for (unsigned op = 0, Eop = I.getNumOperands() - 3; op < Eop; ++op) {
+ if (op)
Out << ", ";
- writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op-2));
+ writeParamOperand(I.getOperand(op), PAL.getParamAttributes(op + 1));
}
Out << ')';
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index 5e4c9fb..b9aa5c3 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -225,7 +225,12 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
// Calls to these intrinsics are transformed into ShuffleVector's.
NewFn = 0;
return true;
+ } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
+ // Calls to these intrinsics are transformed into vector multiplies.
+ NewFn = 0;
+ return true;
}
+
break;
}
@@ -355,6 +360,18 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
// Clean up the old call now that it has been completely upgraded.
CI->eraseFromParent();
+ } else if (F->getName() == "llvm.x86.sse41.pmulld") {
+ // Upgrade this set of intrinsics into vector multiplies.
+ Instruction *Mul = BinaryOperator::CreateMul(CI->getOperand(1),
+ CI->getOperand(2),
+ CI->getName(),
+ CI);
+ // Fix up all the uses with our new multiply.
+ if (!CI->use_empty())
+ CI->replaceAllUsesWith(Mul);
+
+ // Remove upgraded multiply.
+ CI->eraseFromParent();
} else {
llvm_unreachable("Unknown function for CallInst upgrade.");
}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 4b80e36..c64564b 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMCore
ConstantFold.cpp
Constants.cpp
Core.cpp
+ DebugLoc.cpp
Dominators.cpp
Function.cpp
GVMaterializer.cpp
@@ -16,6 +17,7 @@ add_llvm_library(LLVMCore
Instructions.cpp
IntrinsicInst.cpp
LLVMContext.cpp
+ LLVMContextImpl.cpp
LeakDetector.cpp
Metadata.cpp
Module.cpp
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 10f8879..1553bd5 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -59,6 +59,7 @@ Constant *Constant::getNullValue(const Type *Ty) {
case Type::PointerTyID:
return ConstantPointerNull::get(cast<PointerType>(Ty));
case Type::StructTyID:
+ case Type::UnionTyID:
case Type::ArrayTyID:
case Type::VectorTyID:
return ConstantAggregateZero::get(Ty);
@@ -160,7 +161,7 @@ bool Constant::canTrap() const {
/// isConstantUsed - Return true if the constant has users other than constant
/// exprs and other dangling things.
bool Constant::isConstantUsed() const {
- for (use_const_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
const Constant *UC = dyn_cast<Constant>(*UI);
if (UC == 0 || isa<GlobalValue>(UC))
return true;
@@ -944,7 +945,8 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
// Factory Function Implementation
ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
- assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
+ assert((Ty->isStructTy() || Ty->isUnionTy()
+ || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -1945,6 +1947,20 @@ const char *ConstantExpr::getOpcodeName() const {
return Instruction::getOpcodeName(getOpcode());
}
+
+
+GetElementPtrConstantExpr::
+GetElementPtrConstantExpr(Constant *C, const std::vector<Constant*> &IdxList,
+ const Type *DestTy)
+ : ConstantExpr(DestTy, Instruction::GetElementPtr,
+ OperandTraits<GetElementPtrConstantExpr>::op_end(this)
+ - (IdxList.size()+1), IdxList.size()+1) {
+ OperandList[0] = C;
+ for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
+ OperandList[i+1] = IdxList[i];
+}
+
+
//===----------------------------------------------------------------------===//
// replaceUsesOfWithOnConstant implementations
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index c798ba2..2f2fac5 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -15,6 +15,7 @@
#ifndef LLVM_CONSTANTSCONTEXT_H
#define LLVM_CONSTANTSCONTEXT_H
+#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/Operator.h"
#include "llvm/Support/Debug.h"
@@ -327,6 +328,39 @@ struct ExprMapKeyType {
}
};
+struct InlineAsmKeyType {
+ InlineAsmKeyType(StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
+ bool isAlignStack)
+ : asm_string(AsmString), constraints(Constraints),
+ has_side_effects(hasSideEffects), is_align_stack(isAlignStack) {}
+ std::string asm_string;
+ std::string constraints;
+ bool has_side_effects;
+ bool is_align_stack;
+ bool operator==(const InlineAsmKeyType& that) const {
+ return this->asm_string == that.asm_string &&
+ this->constraints == that.constraints &&
+ this->has_side_effects == that.has_side_effects &&
+ this->is_align_stack == that.is_align_stack;
+ }
+ bool operator<(const InlineAsmKeyType& that) const {
+ if (this->asm_string != that.asm_string)
+ return this->asm_string < that.asm_string;
+ if (this->constraints != that.constraints)
+ return this->constraints < that.constraints;
+ if (this->has_side_effects != that.has_side_effects)
+ return this->has_side_effects < that.has_side_effects;
+ if (this->is_align_stack != that.is_align_stack)
+ return this->is_align_stack < that.is_align_stack;
+ return false;
+ }
+
+ bool operator!=(const InlineAsmKeyType& that) const {
+ return !(*this == that);
+ }
+};
+
// The number of operands for each ConstantCreator::create method is
// determined by the ConstantTraits template.
// ConstantCreator - A class that is used to create constants by
@@ -517,6 +551,23 @@ struct ConstantKeyData<UndefValue> {
}
};
+template<>
+struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType> {
+ static InlineAsm *create(const PointerType *Ty, const InlineAsmKeyType &Key) {
+ return new InlineAsm(Ty, Key.asm_string, Key.constraints,
+ Key.has_side_effects, Key.is_align_stack);
+ }
+};
+
+template<>
+struct ConstantKeyData<InlineAsm> {
+ typedef InlineAsmKeyType ValType;
+ static ValType getValType(InlineAsm *Asm) {
+ return InlineAsmKeyType(Asm->getAsmString(), Asm->getConstraintString(),
+ Asm->hasSideEffects(), Asm->isAlignStack());
+ }
+};
+
template<class ValType, class TypeClass, class ConstantClass,
bool HasLargeKey = false /*true for arrays and structs*/ >
class ConstantUniqueMap : public AbstractTypeUser {
@@ -549,8 +600,8 @@ public:
void freeConstants() {
for (typename MapTy::iterator I=Map.begin(), E=Map.end();
I != E; ++I) {
- if (I->second->use_empty())
- delete I->second;
+ // Asserts that use_empty().
+ delete I->second;
}
}
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index f4f65c5..44d487a 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1651,7 +1651,7 @@ LLVMBasicBlockRef LLVMGetInsertBlock(LLVMBuilderRef Builder) {
}
void LLVMClearInsertionPosition(LLVMBuilderRef Builder) {
- unwrap(Builder)->ClearInsertionPoint ();
+ unwrap(Builder)->ClearInsertionPoint();
}
void LLVMInsertIntoBuilder(LLVMBuilderRef Builder, LLVMValueRef Instr) {
@@ -1670,11 +1670,13 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
/*--.. Metadata builders ...................................................--*/
void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
- unwrap(Builder)->SetCurrentDebugLocation(L? unwrap<MDNode>(L) : NULL);
+ MDNode *Loc = L ? unwrap<MDNode>(L) : NULL;
+ unwrap(Builder)->SetCurrentDebugLocation(NewDebugLoc::getFromDILocation(Loc));
}
LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
- return wrap(unwrap(Builder)->getCurrentDebugLocation());
+ return wrap(unwrap(Builder)->getCurrentDebugLocation()
+ .getAsMDNode(unwrap(Builder)->getContext()));
}
void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
new file mode 100644
index 0000000..f02ce57
--- /dev/null
+++ b/lib/VMCore/DebugLoc.cpp
@@ -0,0 +1,288 @@
+//===-- DebugLoc.cpp - Implement DebugLoc class ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/DebugLoc.h"
+#include "LLVMContextImpl.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// DebugLoc Implementation
+//===----------------------------------------------------------------------===//
+
+MDNode *NewDebugLoc::getScope(const LLVMContext &Ctx) const {
+ if (ScopeIdx == 0) return 0;
+
+ if (ScopeIdx > 0) {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified.
+ assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+ "Invalid ScopeIdx!");
+ return Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+ }
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+}
+
+MDNode *NewDebugLoc::getInlinedAt(const LLVMContext &Ctx) const {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified. Zero is invalid.
+ if (ScopeIdx >= 0) return 0;
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ return Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+/// Return both the Scope and the InlinedAt values.
+void NewDebugLoc::getScopeAndInlinedAt(MDNode *&Scope, MDNode *&IA,
+ const LLVMContext &Ctx) const {
+ if (ScopeIdx == 0) {
+ Scope = IA = 0;
+ return;
+ }
+
+ if (ScopeIdx > 0) {
+ // Positive ScopeIdx is an index into ScopeRecords, which has no inlined-at
+ // position specified.
+ assert(unsigned(ScopeIdx) <= Ctx.pImpl->ScopeRecords.size() &&
+ "Invalid ScopeIdx!");
+ Scope = Ctx.pImpl->ScopeRecords[ScopeIdx-1].get();
+ IA = 0;
+ return;
+ }
+
+ // Otherwise, the index is in the ScopeInlinedAtRecords array.
+ assert(unsigned(-ScopeIdx) <= Ctx.pImpl->ScopeInlinedAtRecords.size() &&
+ "Invalid ScopeIdx");
+ Scope = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].first.get();
+ IA = Ctx.pImpl->ScopeInlinedAtRecords[-ScopeIdx-1].second.get();
+}
+
+
+NewDebugLoc NewDebugLoc::get(unsigned Line, unsigned Col,
+ MDNode *Scope, MDNode *InlinedAt) {
+ NewDebugLoc Result;
+
+ // If no scope is available, this is an unknown location.
+ if (Scope == 0) return Result;
+
+ // Saturate line and col to "unknown".
+ if (Col > 255) Col = 0;
+ if (Line >= (1 << 24)) Line = 0;
+ Result.LineCol = Line | (Col << 24);
+
+ LLVMContext &Ctx = Scope->getContext();
+
+ // If there is no inlined-at location, use the ScopeRecords array.
+ if (InlinedAt == 0)
+ Result.ScopeIdx = Ctx.pImpl->getOrAddScopeRecordIdxEntry(Scope, 0);
+ else
+ Result.ScopeIdx = Ctx.pImpl->getOrAddScopeInlinedAtIdxEntry(Scope,
+ InlinedAt, 0);
+
+ return Result;
+}
+
+/// getAsMDNode - This method converts the compressed DebugLoc node into a
+/// DILocation compatible MDNode.
+MDNode *NewDebugLoc::getAsMDNode(const LLVMContext &Ctx) const {
+ if (isUnknown()) return 0;
+
+ MDNode *Scope, *IA;
+ getScopeAndInlinedAt(Scope, IA, Ctx);
+ assert(Scope && "If scope is null, this should be isUnknown()");
+
+ LLVMContext &Ctx2 = Scope->getContext();
+ const Type *Int32 = Type::getInt32Ty(Ctx2);
+ Value *Elts[] = {
+ ConstantInt::get(Int32, getLine()), ConstantInt::get(Int32, getCol()),
+ Scope, IA
+ };
+ return MDNode::get(Ctx2, &Elts[0], 4);
+}
+
+/// getFromDILocation - Translate the DILocation quad into a NewDebugLoc.
+NewDebugLoc NewDebugLoc::getFromDILocation(MDNode *N) {
+ if (N == 0 || N->getNumOperands() != 4) return NewDebugLoc();
+
+ MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(2));
+ if (Scope == 0) return NewDebugLoc();
+
+ unsigned LineNo = 0, ColNo = 0;
+ if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(0)))
+ LineNo = Line->getZExtValue();
+ if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(1)))
+ ColNo = Col->getZExtValue();
+
+ return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3)));
+}
+
+//===----------------------------------------------------------------------===//
+// LLVMContextImpl Implementation
+//===----------------------------------------------------------------------===//
+
+int LLVMContextImpl::getOrAddScopeRecordIdxEntry(MDNode *Scope,
+ int ExistingIdx) {
+ // If we already have an entry for this scope, return it.
+ int &Idx = ScopeRecordIdx[Scope];
+ if (Idx) return Idx;
+
+ // If we don't have an entry, but ExistingIdx is specified, use it.
+ if (ExistingIdx)
+ return Idx = ExistingIdx;
+
+ // Otherwise add a new entry.
+
+ // Start out ScopeRecords with a minimal reasonable size to avoid
+ // excessive reallocation starting out.
+ if (ScopeRecords.empty())
+ ScopeRecords.reserve(128);
+
+ // Index is biased by 1 for index.
+ Idx = ScopeRecords.size()+1;
+ ScopeRecords.push_back(DebugRecVH(Scope, this, Idx));
+ return Idx;
+}
+
+int LLVMContextImpl::getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,
+ int ExistingIdx) {
+ // If we already have an entry, return it.
+ int &Idx = ScopeInlinedAtIdx[std::make_pair(Scope, IA)];
+ if (Idx) return Idx;
+
+ // If we don't have an entry, but ExistingIdx is specified, use it.
+ if (ExistingIdx)
+ return Idx = ExistingIdx;
+
+ // Start out ScopeInlinedAtRecords with a minimal reasonable size to avoid
+ // excessive reallocation starting out.
+ if (ScopeInlinedAtRecords.empty())
+ ScopeInlinedAtRecords.reserve(128);
+
+ // Index is biased by 1 and negated.
+ Idx = -ScopeInlinedAtRecords.size()-1;
+ ScopeInlinedAtRecords.push_back(std::make_pair(DebugRecVH(Scope, this, Idx),
+ DebugRecVH(IA, this, Idx)));
+ return Idx;
+}
+
+
+//===----------------------------------------------------------------------===//
+// DebugRecVH Implementation
+//===----------------------------------------------------------------------===//
+
+/// deleted - The MDNode this is pointing to got deleted, so this pointer needs
+/// to drop to null and we need remove our entry from the DenseMap.
+void DebugRecVH::deleted() {
+ // If this is a non-canonical reference, just drop the value to null, we know
+ // it doesn't have a map entry.
+ if (Idx == 0) {
+ setValPtr(0);
+ return;
+ }
+
+ MDNode *Cur = get();
+
+ // If the index is positive, it is an entry in ScopeRecords.
+ if (Idx > 0) {
+ assert(Ctx->ScopeRecordIdx[Cur] == Idx && "Mapping out of date!");
+ Ctx->ScopeRecordIdx.erase(Cur);
+ // Reset this VH to null and we're done.
+ setValPtr(0);
+ Idx = 0;
+ return;
+ }
+
+ // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+ // is the scope or the inlined-at record entry.
+ assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+ std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+ assert((this == &Entry.first || this == &Entry.second) &&
+ "Mapping out of date!");
+
+ MDNode *OldScope = Entry.first.get();
+ MDNode *OldInlinedAt = Entry.second.get();
+ assert(OldScope != 0 && OldInlinedAt != 0 &&
+ "Entry should be non-canonical if either val dropped to null");
+
+ // Otherwise, we do have an entry in it, nuke it and we're done.
+ assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+ "Mapping out of date");
+ Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+
+ // Reset this VH to null. Drop both 'Idx' values to null to indicate that
+ // we're in non-canonical form now.
+ setValPtr(0);
+ Entry.first.Idx = Entry.second.Idx = 0;
+}
+
+void DebugRecVH::allUsesReplacedWith(Value *NewVa) {
+ // If being replaced with a non-mdnode value (e.g. undef) handle this as if
+ // the mdnode got deleted.
+ MDNode *NewVal = dyn_cast<MDNode>(NewVa);
+ if (NewVal == 0) return deleted();
+
+ // If this is a non-canonical reference, just change it, we know it already
+ // doesn't have a map entry.
+ if (Idx == 0) {
+ setValPtr(NewVa);
+ return;
+ }
+
+ MDNode *OldVal = get();
+ assert(OldVal != NewVa && "Node replaced with self?");
+
+ // If the index is positive, it is an entry in ScopeRecords.
+ if (Idx > 0) {
+ assert(Ctx->ScopeRecordIdx[OldVal] == Idx && "Mapping out of date!");
+ Ctx->ScopeRecordIdx.erase(OldVal);
+ setValPtr(NewVal);
+
+ int NewEntry = Ctx->getOrAddScopeRecordIdxEntry(NewVal, Idx);
+
+ // If NewVal already has an entry, this becomes a non-canonical reference,
+ // just drop Idx to 0 to signify this.
+ if (NewEntry != Idx)
+ Idx = 0;
+ return;
+ }
+
+ // Otherwise, it is an entry in ScopeInlinedAtRecords, we don't know if it
+ // is the scope or the inlined-at record entry.
+ assert(unsigned(-Idx-1) < Ctx->ScopeInlinedAtRecords.size());
+ std::pair<DebugRecVH, DebugRecVH> &Entry = Ctx->ScopeInlinedAtRecords[-Idx-1];
+ assert((this == &Entry.first || this == &Entry.second) &&
+ "Mapping out of date!");
+
+ MDNode *OldScope = Entry.first.get();
+ MDNode *OldInlinedAt = Entry.second.get();
+ assert(OldScope != 0 && OldInlinedAt != 0 &&
+ "Entry should be non-canonical if either val dropped to null");
+
+ // Otherwise, we do have an entry in it, nuke it and we're done.
+ assert(Ctx->ScopeInlinedAtIdx[std::make_pair(OldScope, OldInlinedAt)] == Idx&&
+ "Mapping out of date");
+ Ctx->ScopeInlinedAtIdx.erase(std::make_pair(OldScope, OldInlinedAt));
+
+ // Reset this VH to the new value.
+ setValPtr(NewVal);
+
+ int NewIdx = Ctx->getOrAddScopeInlinedAtIdxEntry(Entry.first.get(),
+ Entry.second.get(), Idx);
+ // If NewVal already has an entry, this becomes a non-canonical reference,
+ // just drop Idx to 0 to signify this.
+ if (NewIdx != Idx) {
+ std::pair<DebugRecVH, DebugRecVH> &Entry=Ctx->ScopeInlinedAtRecords[-Idx-1];
+ Entry.first.Idx = Entry.second.Idx = 0;
+ }
+}
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index dbc283e..8f94efc 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -16,6 +16,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/StringPool.h"
@@ -400,13 +401,16 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys,
#include "llvm/Intrinsics.gen"
#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
- /// hasAddressTaken - returns true if there are any uses of this function
- /// other than direct calls or invokes to it.
-bool Function::hasAddressTaken() const {
- for (Value::use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) {
- if (I.getOperandNo() != 0 ||
- (!isa<CallInst>(*I) && !isa<InvokeInst>(*I)))
- return true;
+/// hasAddressTaken - returns true if there are any uses of this function
+/// other than direct calls or invokes to it.
+bool Function::hasAddressTaken(const User* *PutOffender) const {
+ for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ const User *U = *I;
+ if (!isa<CallInst>(U) && !isa<InvokeInst>(U))
+ return PutOffender ? (*PutOffender = U, true) : true;
+ ImmutableCallSite CS(cast<Instruction>(U));
+ if (!CS.isCallee(I))
+ return PutOffender ? (*PutOffender = U, true) : true;
}
return false;
}
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 489ec65..b758eb8 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -61,8 +61,8 @@ void GlobalValue::Dematerialize() {
/// that want to check to see if a global is unused, but don't want to deal
/// with potentially dead constants hanging off of the globals.
void GlobalValue::removeDeadConstantUsers() const {
- Value::use_const_iterator I = use_begin(), E = use_end();
- Value::use_const_iterator LastNonDeadUser = E;
+ Value::const_use_iterator I = use_begin(), E = use_end();
+ Value::const_use_iterator LastNonDeadUser = E;
while (I != E) {
if (const Constant *User = dyn_cast<Constant>(*I)) {
if (!removeDeadUsersOfConstant(User)) {
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index 9f2786e..c1b783c 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -32,19 +32,6 @@ Value *IRBuilderBase::CreateGlobalString(const char *Str, const Twine &Name) {
return GV;
}
-/// SetCurrentDebugLocation - Set location information used by debugging
-/// information.
-void IRBuilderBase::SetCurrentDebugLocation(MDNode *L) {
- if (DbgMDKind == 0)
- DbgMDKind = Context.getMDKindID("dbg");
- CurDbgLocation = L;
-}
-
-void IRBuilderBase::SetInstDebugLocation(Instruction *I) const {
- if (CurDbgLocation)
- I->setMetadata(DbgMDKind, CurDbgLocation);
-}
-
const Type *IRBuilderBase::getCurrentFunctionReturnType() const {
assert(BB && BB->getParent() && "No current function!");
return BB->getParent()->getReturnType();
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 6355834..0d2eca9 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -12,6 +12,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/InlineAsm.h"
+#include "ConstantsContext.h"
+#include "LLVMContextImpl.h"
#include "llvm/DerivedTypes.h"
#include <algorithm>
#include <cctype>
@@ -23,28 +25,29 @@ InlineAsm::~InlineAsm() {
}
-// NOTE: when memoizing the function type, we have to be careful to handle the
-// case when the type gets refined.
-
InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
StringRef Constraints, bool hasSideEffects,
bool isAlignStack) {
- // FIXME: memoize!
- return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects,
- isAlignStack);
+ InlineAsmKeyType Key(AsmString, Constraints, hasSideEffects, isAlignStack);
+ LLVMContextImpl *pImpl = Ty->getContext().pImpl;
+ return pImpl->InlineAsms.getOrCreate(PointerType::getUnqual(Ty), Key);
}
-InlineAsm::InlineAsm(const FunctionType *Ty, StringRef asmString,
- StringRef constraints, bool hasSideEffects,
+InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
+ const std::string &constraints, bool hasSideEffects,
bool isAlignStack)
- : Value(PointerType::getUnqual(Ty),
- Value::InlineAsmVal),
+ : Value(Ty, Value::InlineAsmVal),
AsmString(asmString),
Constraints(constraints), HasSideEffects(hasSideEffects),
IsAlignStack(isAlignStack) {
// Do various checks on the constraint string and type.
- assert(Verify(Ty, constraints) && "Function type not legal for constraints!");
+ assert(Verify(getFunctionType(), constraints) &&
+ "Function type not legal for constraints!");
+}
+
+void InlineAsm::destroyConstant() {
+ delete this;
}
const FunctionType *InlineAsm::getFunctionType() const {
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 3fabfd0..a37fe07 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -283,7 +283,7 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
/// specified block. Note that PHI nodes are considered to evaluate their
/// operands in the corresponding predecessor block.
bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
- for (use_const_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
+ for (const_use_iterator UI = use_begin(), E = use_end(); UI != E; ++UI) {
// PHI nodes uses values in the corresponding predecessor block. For other
// instructions, just check to see whether the parent of the use matches up.
const PHINode *PN = dyn_cast<PHINode>(*UI);
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 8f4763f..4609a64 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -31,23 +31,18 @@ using namespace llvm;
//===----------------------------------------------------------------------===//
#define CALLSITE_DELEGATE_GETTER(METHOD) \
- Instruction *II(getInstruction()); \
+ Instruction *II = getInstruction(); \
return isCall() \
? cast<CallInst>(II)->METHOD \
: cast<InvokeInst>(II)->METHOD
#define CALLSITE_DELEGATE_SETTER(METHOD) \
- Instruction *II(getInstruction()); \
+ Instruction *II = getInstruction(); \
if (isCall()) \
cast<CallInst>(II)->METHOD; \
else \
cast<InvokeInst>(II)->METHOD
-CallSite::CallSite(Instruction *C) {
- assert((isa<CallInst>(C) || isa<InvokeInst>(C)) && "Not a call!");
- I.setPointer(C);
- I.setInt(isa<CallInst>(C));
-}
CallingConv::ID CallSite::getCallingConv() const {
CALLSITE_DELEGATE_GETTER(getCallingConv());
}
@@ -66,6 +61,17 @@ bool CallSite::paramHasAttr(uint16_t i, Attributes attr) const {
uint16_t CallSite::getParamAlignment(uint16_t i) const {
CALLSITE_DELEGATE_GETTER(getParamAlignment(i));
}
+
+/// @brief Return true if the call should not be inlined.
+bool CallSite::isNoInline() const {
+ CALLSITE_DELEGATE_GETTER(isNoInline());
+}
+
+void CallSite::setIsNoInline(bool Value) {
+ CALLSITE_DELEGATE_GETTER(setIsNoInline(Value));
+}
+
+
bool CallSite::doesNotAccessMemory() const {
CALLSITE_DELEGATE_GETTER(doesNotAccessMemory());
}
@@ -98,6 +104,13 @@ bool CallSite::hasArgument(const Value *Arg) const {
return false;
}
+User::op_iterator CallSite::getCallee() const {
+ Instruction *II(getInstruction());
+ return isCall()
+ ? cast<CallInst>(II)->op_begin()
+ : cast<InvokeInst>(II)->op_end() - 3; // Skip BB, BB, Function
+}
+
#undef CALLSITE_DELEGATE_GETTER
#undef CALLSITE_DELEGATE_SETTER
@@ -611,24 +624,24 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
Value* const *Args, unsigned NumArgs) {
assert(NumOperands == 3+NumArgs && "NumOperands not set up?");
- Use *OL = OperandList;
- OL[0] = Fn;
- OL[1] = IfNormal;
- OL[2] = IfException;
+ Op<-3>() = Fn;
+ Op<-2>() = IfNormal;
+ Op<-1>() = IfException;
const FunctionType *FTy =
cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
FTy = FTy; // silence warning.
assert(((NumArgs == FTy->getNumParams()) ||
(FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
- "Calling a function with bad signature");
+ "Invoking a function with bad signature");
+ Use *OL = OperandList;
for (unsigned i = 0, e = NumArgs; i != e; i++) {
assert((i >= FTy->getNumParams() ||
FTy->getParamType(i) == Args[i]->getType()) &&
"Invoking a function with a bad signature!");
- OL[i+3] = Args[i];
+ OL[i] = Args[i];
}
}
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 5a8ea5c..2a870ec 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -26,19 +26,52 @@ LLVMContext& llvm::getGlobalContext() {
return *GlobalContext;
}
-LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { }
+LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
+ // Create the first metadata kind, which is always 'dbg'.
+ unsigned DbgID = getMDKindID("dbg");
+ assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
+}
LLVMContext::~LLVMContext() { delete pImpl; }
-GetElementPtrConstantExpr::GetElementPtrConstantExpr
- (Constant *C,
- const std::vector<Constant*> &IdxList,
- const Type *DestTy)
- : ConstantExpr(DestTy, Instruction::GetElementPtr,
- OperandTraits<GetElementPtrConstantExpr>::op_end(this)
- - (IdxList.size()+1),
- IdxList.size()+1) {
- OperandList[0] = C;
- for (unsigned i = 0, E = IdxList.size(); i != E; ++i)
- OperandList[i+1] = IdxList[i];
+
+#ifndef NDEBUG
+/// isValidName - Return true if Name is a valid custom metadata handler name.
+static bool isValidName(StringRef MDName) {
+ if (MDName.empty())
+ return false;
+
+ if (!isalpha(MDName[0]))
+ return false;
+
+ for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
+ ++I) {
+ if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
+ return false;
+ }
+ return true;
+}
+#endif
+
+/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
+unsigned LLVMContext::getMDKindID(StringRef Name) const {
+ assert(isValidName(Name) && "Invalid MDNode name");
+
+ unsigned &Entry = pImpl->CustomMDKindNames[Name];
+
+ // If this is new, assign it its ID.
+ if (Entry == 0) Entry = pImpl->CustomMDKindNames.size();
+ return Entry;
}
+/// getHandlerNames - Populate client supplied smallvector using custome
+/// metadata name and ID.
+void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
+ Names.resize(pImpl->CustomMDKindNames.size()+1);
+ Names[0] = "";
+ for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
+ E = pImpl->CustomMDKindNames.end(); I != E; ++I)
+ // MD Handlers are numbered from 1.
+ Names[I->second] = I->first();
+}
+
+
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
new file mode 100644
index 0000000..b4553dd
--- /dev/null
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -0,0 +1,103 @@
+//===-- LLVMContextImpl.cpp - Implement LLVMContextImpl -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the opaque LLVMContextImpl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "LLVMContextImpl.h"
+#include <algorithm>
+
+LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
+ : TheTrueVal(0), TheFalseVal(0),
+ VoidTy(C, Type::VoidTyID),
+ LabelTy(C, Type::LabelTyID),
+ FloatTy(C, Type::FloatTyID),
+ DoubleTy(C, Type::DoubleTyID),
+ MetadataTy(C, Type::MetadataTyID),
+ X86_FP80Ty(C, Type::X86_FP80TyID),
+ FP128Ty(C, Type::FP128TyID),
+ PPC_FP128Ty(C, Type::PPC_FP128TyID),
+ Int1Ty(C, 1),
+ Int8Ty(C, 8),
+ Int16Ty(C, 16),
+ Int32Ty(C, 32),
+ Int64Ty(C, 64),
+ AlwaysOpaqueTy(new OpaqueType(C)) {
+ // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
+ AlwaysOpaqueTy->addRef();
+ OpaqueTypes.insert(AlwaysOpaqueTy);
+}
+
+namespace {
+struct DropReferences {
+ // Takes the value_type of a ConstantUniqueMap's internal map, whose 'second'
+ // is a Constant*.
+ template<typename PairT>
+ void operator()(const PairT &P) {
+ P.second->dropAllReferences();
+ }
+};
+}
+
+LLVMContextImpl::~LLVMContextImpl() {
+ std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
+ DropReferences());
+ std::for_each(ArrayConstants.map_begin(), ArrayConstants.map_end(),
+ DropReferences());
+ std::for_each(StructConstants.map_begin(), StructConstants.map_end(),
+ DropReferences());
+ std::for_each(UnionConstants.map_begin(), UnionConstants.map_end(),
+ DropReferences());
+ std::for_each(VectorConstants.map_begin(), VectorConstants.map_end(),
+ DropReferences());
+ ExprConstants.freeConstants();
+ ArrayConstants.freeConstants();
+ StructConstants.freeConstants();
+ UnionConstants.freeConstants();
+ VectorConstants.freeConstants();
+ AggZeroConstants.freeConstants();
+ NullPtrConstants.freeConstants();
+ UndefValueConstants.freeConstants();
+ InlineAsms.freeConstants();
+ for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end();
+ I != E; ++I) {
+ delete I->second;
+ }
+ for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end();
+ I != E; ++I) {
+ delete I->second;
+ }
+ AlwaysOpaqueTy->dropRef();
+ for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
+ I != E; ++I) {
+ (*I)->AbstractTypeUsers.clear();
+ delete *I;
+ }
+ // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
+ // and the NonUniquedMDNodes sets, so copy the values out first.
+ SmallVector<MDNode*, 8> MDNodes;
+ MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
+ for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+ I != E; ++I) {
+ MDNodes.push_back(&*I);
+ }
+ MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
+ for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(),
+ E = MDNodes.end(); I != E; ++I) {
+ (*I)->destroy();
+ }
+ assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
+ "Destroying all MDNodes didn't empty the Context's sets.");
+ // Destroy MDStrings.
+ for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
+ E = MDStringCache.end(); I != E; ++I) {
+ delete I->second;
+ }
+}
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 9978f40..d4ebf80 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -1,4 +1,4 @@
-//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class --------------===//
+//===-- LLVMContextImpl.h - The LLVMContextImpl opaque class ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,9 +19,9 @@
#include "LeaksContext.h"
#include "TypesContext.h"
#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
+#include "llvm/Metadata.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/ValueHandle.h"
#include "llvm/ADT/APFloat.h"
@@ -36,8 +36,6 @@ namespace llvm {
class ConstantInt;
class ConstantFP;
-class MDString;
-class MDNode;
class LLVMContext;
class Type;
class Value;
@@ -92,6 +90,29 @@ struct DenseMapAPFloatKeyInfo {
}
};
+/// DebugRecVH - This is a CallbackVH used to keep the Scope -> index maps
+/// up to date as MDNodes mutate. This class is implemented in DebugLoc.cpp.
+class DebugRecVH : public CallbackVH {
+ /// Ctx - This is the LLVM Context being referenced.
+ LLVMContextImpl *Ctx;
+
+ /// Idx - The index into either ScopeRecordIdx or ScopeInlinedAtRecords that
+ /// this reference lives in. If this is zero, then it represents a
+ /// non-canonical entry that has no DenseMap value. This can happen due to
+ /// RAUW.
+ int Idx;
+public:
+ DebugRecVH(MDNode *n, LLVMContextImpl *ctx, int idx)
+ : CallbackVH(n), Ctx(ctx), Idx(idx) {}
+
+ MDNode *get() const {
+ return cast_or_null<MDNode>(getValPtr());
+ }
+
+ virtual void deleted();
+ virtual void allUsesReplacedWith(Value *VNew);
+};
+
class LLVMContextImpl {
public:
typedef DenseMap<DenseMapAPIntKeyInfo::KeyTy, ConstantInt*,
@@ -130,11 +151,12 @@ public:
VectorConstantsTy VectorConstants;
ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
-
ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants;
DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+
+ ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms;
ConstantInt *TheTrueVal;
ConstantInt *TheFalseVal;
@@ -195,72 +217,29 @@ public:
/// context.
DenseMap<const Instruction *, MDMapTy> MetadataStore;
+ /// ScopeRecordIdx - This is the index in ScopeRecords for an MDNode scope
+ /// entry with no "inlined at" element.
+ DenseMap<MDNode*, int> ScopeRecordIdx;
- LLVMContextImpl(LLVMContext &C) : TheTrueVal(0), TheFalseVal(0),
- VoidTy(C, Type::VoidTyID),
- LabelTy(C, Type::LabelTyID),
- FloatTy(C, Type::FloatTyID),
- DoubleTy(C, Type::DoubleTyID),
- MetadataTy(C, Type::MetadataTyID),
- X86_FP80Ty(C, Type::X86_FP80TyID),
- FP128Ty(C, Type::FP128TyID),
- PPC_FP128Ty(C, Type::PPC_FP128TyID),
- Int1Ty(C, 1),
- Int8Ty(C, 8),
- Int16Ty(C, 16),
- Int32Ty(C, 32),
- Int64Ty(C, 64),
- AlwaysOpaqueTy(new OpaqueType(C)) {
- // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
- AlwaysOpaqueTy->addRef();
- OpaqueTypes.insert(AlwaysOpaqueTy);
- }
-
- ~LLVMContextImpl() {
- ExprConstants.freeConstants();
- ArrayConstants.freeConstants();
- StructConstants.freeConstants();
- VectorConstants.freeConstants();
- AggZeroConstants.freeConstants();
- NullPtrConstants.freeConstants();
- UndefValueConstants.freeConstants();
- for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end();
- I != E; ++I) {
- if (I->second->use_empty())
- delete I->second;
- }
- for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end();
- I != E; ++I) {
- if (I->second->use_empty())
- delete I->second;
- }
- AlwaysOpaqueTy->dropRef();
- for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
- I != E; ++I) {
- (*I)->AbstractTypeUsers.clear();
- delete *I;
- }
- // Destroy MDNodes. ~MDNode can move and remove nodes between the MDNodeSet
- // and the NonUniquedMDNodes sets, so copy the values out first.
- SmallVector<MDNode*, 8> MDNodes;
- MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
- for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
- I != E; ++I) {
- MDNodes.push_back(&*I);
- }
- MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
- for (SmallVector<MDNode*, 8>::iterator I = MDNodes.begin(),
- E = MDNodes.end(); I != E; ++I) {
- (*I)->destroy();
- }
- assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
- "Destroying all MDNodes didn't empty the Context's sets.");
- // Destroy MDStrings.
- for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
- E = MDStringCache.end(); I != E; ++I) {
- delete I->second;
- }
- }
+ /// ScopeRecords - These are the actual mdnodes (in a value handle) for an
+ /// index. The ValueHandle ensures that ScopeRecordIdx stays up to date if
+ /// the MDNode is RAUW'd.
+ std::vector<DebugRecVH> ScopeRecords;
+
+ /// ScopeInlinedAtIdx - This is the index in ScopeInlinedAtRecords for an
+ /// scope/inlined-at pair.
+ DenseMap<std::pair<MDNode*, MDNode*>, int> ScopeInlinedAtIdx;
+
+ /// ScopeInlinedAtRecords - These are the actual mdnodes (in value handles)
+ /// for an index. The ValueHandle ensures that ScopeINlinedAtIdx stays up
+ /// to date.
+ std::vector<std::pair<DebugRecVH, DebugRecVH> > ScopeInlinedAtRecords;
+
+ int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx);
+ int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx);
+
+ LLVMContextImpl(LLVMContext &C);
+ ~LLVMContextImpl();
};
}
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 06d4fd4..73e6091 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -182,19 +182,6 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
unsigned NumVals, FunctionLocalness FL,
bool Insert) {
LLVMContextImpl *pImpl = Context.pImpl;
- FoldingSetNodeID ID;
- for (unsigned i = 0; i != NumVals; ++i)
- ID.AddPointer(Vals[i]);
-
- void *InsertPoint;
- MDNode *N = NULL;
-
- if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
- return N;
-
- if (!Insert)
- return NULL;
-
bool isFunctionLocal = false;
switch (FL) {
case FL_Unknown:
@@ -216,6 +203,20 @@ MDNode *MDNode::getMDNode(LLVMContext &Context, Value *const *Vals,
break;
}
+ FoldingSetNodeID ID;
+ for (unsigned i = 0; i != NumVals; ++i)
+ ID.AddPointer(Vals[i]);
+ ID.AddBoolean(isFunctionLocal);
+
+ void *InsertPoint;
+ MDNode *N = NULL;
+
+ if ((N = pImpl->MDNodeSet.FindNodeOrInsertPos(ID, InsertPoint)))
+ return N;
+
+ if (!Insert)
+ return NULL;
+
// Coallocate space for the node and Operands together, then placement new.
void *Ptr = malloc(sizeof(MDNode)+NumVals*sizeof(MDNodeOperand));
N = new (Ptr) MDNode(Context, Vals, NumVals, isFunctionLocal);
@@ -248,6 +249,7 @@ Value *MDNode::getOperand(unsigned i) const {
void MDNode::Profile(FoldingSetNodeID &ID) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
ID.AddPointer(getOperand(i));
+ ID.AddBoolean(isFunctionLocal());
}
void MDNode::setIsNotUniqued() {
@@ -410,50 +412,6 @@ StringRef NamedMDNode::getName() const {
}
//===----------------------------------------------------------------------===//
-// LLVMContext MDKind naming implementation.
-//
-
-#ifndef NDEBUG
-/// isValidName - Return true if Name is a valid custom metadata handler name.
-static bool isValidName(StringRef MDName) {
- if (MDName.empty())
- return false;
-
- if (!isalpha(MDName[0]))
- return false;
-
- for (StringRef::iterator I = MDName.begin() + 1, E = MDName.end(); I != E;
- ++I) {
- if (!isalnum(*I) && *I != '_' && *I != '-' && *I != '.')
- return false;
- }
- return true;
-}
-#endif
-
-/// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
-unsigned LLVMContext::getMDKindID(StringRef Name) const {
- assert(isValidName(Name) && "Invalid MDNode name");
-
- unsigned &Entry = pImpl->CustomMDKindNames[Name];
-
- // If this is new, assign it its ID.
- if (Entry == 0) Entry = pImpl->CustomMDKindNames.size();
- return Entry;
-}
-
-/// getHandlerNames - Populate client supplied smallvector using custome
-/// metadata name and ID.
-void LLVMContext::getMDKindNames(SmallVectorImpl<StringRef> &Names) const {
- Names.resize(pImpl->CustomMDKindNames.size()+1);
- Names[0] = "";
- for (StringMap<unsigned>::const_iterator I = pImpl->CustomMDKindNames.begin(),
- E = pImpl->CustomMDKindNames.end(); I != E; ++I)
- // MD Handlers are numbered from 1.
- Names[I->second] = I->first();
-}
-
-//===----------------------------------------------------------------------===//
// Instruction Metadata method implementations.
//
@@ -466,18 +424,29 @@ MDNode *Instruction::getMetadataImpl(const char *Kind) const {
return getMetadataImpl(getContext().getMDKindID(Kind));
}
+void Instruction::setDbgMetadata(MDNode *Node) {
+ DbgLoc = NewDebugLoc::getFromDILocation(Node);
+}
+
/// setMetadata - Set the metadata of of the specified kind to the specified
/// node. This updates/replaces metadata if already present, or removes it if
/// Node is null.
void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
if (Node == 0 && !hasMetadata()) return;
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (KindID == LLVMContext::MD_dbg) {
+ DbgLoc = NewDebugLoc::getFromDILocation(Node);
+ return;
+ }
+
// Handle the case when we're adding/updating metadata on an instruction.
if (Node) {
LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
- assert(!Info.empty() == hasMetadata() && "HasMetadata bit is wonked");
+ assert(!Info.empty() == hasMetadataHashEntry() &&
+ "HasMetadata bit is wonked");
if (Info.empty()) {
- setHasMetadata(true);
+ setHasMetadataHashEntry(true);
} else {
// Handle replacement of an existing value.
for (unsigned i = 0, e = Info.size(); i != e; ++i)
@@ -493,18 +462,19 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
}
// Otherwise, we're removing metadata from an instruction.
- assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) &&
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
"HasMetadata bit out of date!");
LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
// Common case is removing the only entry.
if (Info.size() == 1 && Info[0].first == KindID) {
getContext().pImpl->MetadataStore.erase(this);
- setHasMetadata(false);
+ setHasMetadataHashEntry(false);
return;
}
- // Handle replacement of an existing value.
+ // Handle removal of an existing value.
for (unsigned i = 0, e = Info.size(); i != e; ++i)
if (Info[i].first == KindID) {
Info[i] = Info.back();
@@ -516,8 +486,14 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
}
MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (KindID == LLVMContext::MD_dbg)
+ return DbgLoc.getAsMDNode(getContext());
+
+ if (!hasMetadataHashEntry()) return 0;
+
LLVMContextImpl::MDMapTy &Info = getContext().pImpl->MetadataStore[this];
- assert(hasMetadata() && !Info.empty() && "Shouldn't have called this");
+ assert(!Info.empty() && "bit out of sync with hash table");
for (LLVMContextImpl::MDMapTy::iterator I = Info.begin(), E = Info.end();
I != E; ++I)
@@ -527,14 +503,23 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
}
void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
- MDNode*> > &Result)const {
- assert(hasMetadata() && getContext().pImpl->MetadataStore.count(this) &&
+ MDNode*> > &Result) const {
+ Result.clear();
+
+ // Handle 'dbg' as a special case since it is not stored in the hash table.
+ if (!DbgLoc.isUnknown()) {
+ Result.push_back(std::make_pair((unsigned)LLVMContext::MD_dbg,
+ DbgLoc.getAsMDNode(getContext())));
+ if (!hasMetadataHashEntry()) return;
+ }
+
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
"Shouldn't have called this");
const LLVMContextImpl::MDMapTy &Info =
getContext().pImpl->MetadataStore.find(this)->second;
assert(!Info.empty() && "Shouldn't have called this");
- Result.clear();
Result.append(Info.begin(), Info.end());
// Sort the resulting array so it is stable.
@@ -542,10 +527,32 @@ void Instruction::getAllMetadataImpl(SmallVectorImpl<std::pair<unsigned,
array_pod_sort(Result.begin(), Result.end());
}
+void Instruction::
+getAllMetadataOtherThanDebugLocImpl(SmallVectorImpl<std::pair<unsigned,
+ MDNode*> > &Result) const {
+ Result.clear();
+ assert(hasMetadataHashEntry() &&
+ getContext().pImpl->MetadataStore.count(this) &&
+ "Shouldn't have called this");
+ const LLVMContextImpl::MDMapTy &Info =
+ getContext().pImpl->MetadataStore.find(this)->second;
+ assert(!Info.empty() && "Shouldn't have called this");
+
+ Result.append(Info.begin(), Info.end());
+
+ // Sort the resulting array so it is stable.
+ if (Result.size() > 1)
+ array_pod_sort(Result.begin(), Result.end());
+}
+
+
/// removeAllMetadata - Remove all metadata from this instruction.
void Instruction::removeAllMetadata() {
assert(hasMetadata() && "Caller should check");
- getContext().pImpl->MetadataStore.erase(this);
- setHasMetadata(false);
+ DbgLoc = NewDebugLoc();
+ if (hasMetadataHashEntry()) {
+ getContext().pImpl->MetadataStore.erase(this);
+ setHasMetadataHashEntry(false);
+ }
}
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 001bb00..94840f0 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -82,7 +82,7 @@ Module::Endianness Module::getEndianness() const {
while (!temp.empty()) {
StringRef token = DataLayout;
- tie(token, temp) = getToken(DataLayout, "-");
+ tie(token, temp) = getToken(temp, "-");
if (token[0] == 'e') {
ret = LittleEndian;
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index c4dfe14..6774cec 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -378,17 +378,19 @@ namespace {
static ManagedStatic<sys::SmartMutex<true> > TimingInfoMutex;
class TimingInfo {
- std::map<Pass*, Timer> TimingData;
+ DenseMap<Pass*, Timer*> TimingData;
TimerGroup TG;
-
public:
// Use 'create' member to get this.
TimingInfo() : TG("... Pass execution timing report ...") {}
// TimingDtor - Print out information about timing information
~TimingInfo() {
- // Delete all of the timers...
- TimingData.clear();
+ // Delete all of the timers, which accumulate their info into the
+ // TimerGroup.
+ for (DenseMap<Pass*, Timer*>::iterator I = TimingData.begin(),
+ E = TimingData.end(); I != E; ++I)
+ delete I->second;
// TimerGroup is deleted next, printing the report.
}
@@ -397,18 +399,15 @@ public:
// null. It may be called multiple times.
static void createTheTimeInfo();
- /// passStarted - This method creates a timer for the given pass if it doesn't
- /// already have one, and starts the timer.
- Timer *passStarted(Pass *P) {
+ /// getPassTimer - Return the timer for the specified pass if it exists.
+ Timer *getPassTimer(Pass *P) {
if (P->getAsPMDataManager())
return 0;
sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
- std::map<Pass*, Timer>::iterator I = TimingData.find(P);
- if (I == TimingData.end())
- I=TimingData.insert(std::make_pair(P, Timer(P->getPassName(), TG))).first;
- Timer *T = &I->second;
- T->startTimer();
+ Timer *&T = TimingData[P];
+ if (T == 0)
+ T = new Timer(P->getPassName(), TG);
return T;
}
};
@@ -704,11 +703,8 @@ void PMDataManager::verifyPreservedAnalysis(Pass *P) {
E = PreservedSet.end(); I != E; ++I) {
AnalysisID AID = *I;
if (Pass *AP = findAnalysisPass(AID, true)) {
-
- Timer *T = 0;
- if (TheTimeInfo) T = TheTimeInfo->passStarted(AP);
+ TimeRegion PassTimer(getPassTimer(AP));
AP->verifyAnalysis();
- if (T) T->stopTimer();
}
}
}
@@ -792,10 +788,9 @@ void PMDataManager::freePass(Pass *P, StringRef Msg,
{
// If the pass crashes releasing memory, remember this.
PassManagerPrettyStackEntry X(P);
-
- Timer *T = StartPassTimer(P);
+ TimeRegion PassTimer(getPassTimer(P));
+
P->releaseMemory();
- StopPassTimer(P, T);
}
if (const PassInfo *PI = P->getPassInfo()) {
@@ -1128,10 +1123,9 @@ bool BBPassManager::runOnFunction(Function &F) {
{
// If the pass crashes, remember this.
PassManagerPrettyStackEntry X(BP, *I);
-
- Timer *T = StartPassTimer(BP);
+ TimeRegion PassTimer(getPassTimer(BP));
+
LocalChanged |= BP->runOnBasicBlock(*I);
- StopPassTimer(BP, T);
}
Changed |= LocalChanged;
@@ -1345,10 +1339,9 @@ bool FPPassManager::runOnFunction(Function &F) {
{
PassManagerPrettyStackEntry X(FP, F);
+ TimeRegion PassTimer(getPassTimer(FP));
- Timer *T = StartPassTimer(FP);
LocalChanged |= FP->runOnFunction(F);
- StopPassTimer(FP, T);
}
Changed |= LocalChanged;
@@ -1420,9 +1413,9 @@ MPPassManager::runOnModule(Module &M) {
{
PassManagerPrettyStackEntry X(MP, M);
- Timer *T = StartPassTimer(MP);
+ TimeRegion PassTimer(getPassTimer(MP));
+
LocalChanged |= MP->runOnModule(M);
- StopPassTimer(MP, T);
}
Changed |= LocalChanged;
@@ -1559,17 +1552,12 @@ void TimingInfo::createTheTimeInfo() {
}
/// If TimingInfo is enabled then start pass timer.
-Timer *llvm::StartPassTimer(Pass *P) {
+Timer *llvm::getPassTimer(Pass *P) {
if (TheTimeInfo)
- return TheTimeInfo->passStarted(P);
+ return TheTimeInfo->getPassTimer(P);
return 0;
}
-/// If TimingInfo is enabled then stop pass timer.
-void llvm::StopPassTimer(Pass *P, Timer *T) {
- if (T) T->stopTimer();
-}
-
//===----------------------------------------------------------------------===//
// PMStack implementation
//
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index 2a0cfa8..5f9c11f 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -57,6 +57,11 @@ void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
/// need for a std::vector to be used in the Type class itself.
/// @brief Type destruction function
void Type::destroy() const {
+ // Nothing calls getForwardedType from here on.
+ if (ForwardType && ForwardType->isAbstract()) {
+ ForwardType->dropRef();
+ ForwardType = NULL;
+ }
// Structures and Functions allocate their contained types past the end of
// the type object itself. These need to be destroyed differently than the
@@ -87,11 +92,6 @@ void Type::destroy() const {
pImpl->OpaqueTypes.erase(opaque_this);
}
- if (ForwardType && ForwardType->isAbstract()) {
- ForwardType->dropRef();
- ForwardType = NULL;
- }
-
// For all the other type subclasses, there is either no contained types or
// just one (all Sequentials). For Sequentials, the PATypeHandle is not
// allocated past the type object, its included directly in the SequentialType
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index a36d262..645dd5a 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -86,7 +86,7 @@ Value::~Value() {
/// hasNUses - Return true if this Value has exactly N users.
///
bool Value::hasNUses(unsigned N) const {
- use_const_iterator UI = use_begin(), E = use_end();
+ const_use_iterator UI = use_begin(), E = use_end();
for (; N; --N, ++UI)
if (UI == E) return false; // Too few.
@@ -97,7 +97,7 @@ bool Value::hasNUses(unsigned N) const {
/// logically equivalent to getNumUses() >= N.
///
bool Value::hasNUsesOrMore(unsigned N) const {
- use_const_iterator UI = use_begin(), E = use_end();
+ const_use_iterator UI = use_begin(), E = use_end();
for (; N; --N, ++UI)
if (UI == E) return false; // Too few.
@@ -108,7 +108,7 @@ bool Value::hasNUsesOrMore(unsigned N) const {
/// isUsedInBasicBlock - Return true if this value is used in the specified
/// basic block.
bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
- for (use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) {
+ for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) {
const Instruction *User = dyn_cast<Instruction>(*I);
if (User && User->getParent() == BB)
return true;
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index d30a9d6..449d61a 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -55,9 +55,7 @@ void ValueSymbolTable::reinsertValue(Value* V) {
raw_svector_ostream(UniqueName) << ++LastUnique;
// Try insert the vmap entry with this suffix.
- ValueName &NewName =
- vmap.GetOrCreateValue(StringRef(UniqueName.data(),
- UniqueName.size()));
+ ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
if (NewName.getValue() == 0) {
// Newly inserted name. Success!
NewName.setValue(V);
@@ -88,7 +86,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
}
// Otherwise, there is a naming conflict. Rename this value.
- SmallString<128> UniqueName(Name.begin(), Name.end());
+ SmallString<256> UniqueName(Name.begin(), Name.end());
while (1) {
// Trim any suffix off and append the next number.
@@ -96,9 +94,7 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
raw_svector_ostream(UniqueName) << ++LastUnique;
// Try insert the vmap entry with this suffix.
- ValueName &NewName =
- vmap.GetOrCreateValue(StringRef(UniqueName.data(),
- UniqueName.size()));
+ ValueName &NewName = vmap.GetOrCreateValue(UniqueName);
if (NewName.getValue() == 0) {
// Newly inserted name. Success!
NewName.setValue(V);
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index f141382..c18168d 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -676,17 +676,13 @@ void Verifier::visitFunction(Function &F) {
"blockaddress may not be used with the entry block!", Entry);
}
}
-
+
// If this function is actually an intrinsic, verify that it is only used in
// direct call/invokes, never having its "address taken".
if (F.getIntrinsicID()) {
- for (Value::use_iterator UI = F.use_begin(), E = F.use_end(); UI != E;++UI){
- User *U = cast<User>(UI);
- if ((isa<CallInst>(U) || isa<InvokeInst>(U)) && UI.getOperandNo() == 0)
- continue; // Direct calls/invokes are ok.
-
+ const User *U;
+ if (F.hasAddressTaken(&U))
Assert1(0, "Invalid user of intrinsic instruction!", U);
- }
}
}
@@ -1483,7 +1479,7 @@ void Verifier::visitInstruction(Instruction &I) {
"Instruction does not dominate all uses!", Op, &I);
}
} else if (isa<InlineAsm>(I.getOperand(i))) {
- Assert1(i == 0 && (isa<CallInst>(I) || isa<InvokeInst>(I)),
+ Assert1((i == 0 && isa<CallInst>(I)) || (i + 3 == e && isa<InvokeInst>(I)),
"Cannot take the address of an inline asm!", &I);
}
}
@@ -1683,13 +1679,11 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
/// parameters beginning with NumRets.
///
static std::string IntrinsicParam(unsigned ArgNo, unsigned NumRets) {
- if (ArgNo < NumRets) {
- if (NumRets == 1)
- return "Intrinsic result type";
- else
- return "Intrinsic result type #" + utostr(ArgNo);
- } else
+ if (ArgNo >= NumRets)
return "Intrinsic parameter #" + utostr(ArgNo - NumRets);
+ if (NumRets == 1)
+ return "Intrinsic result type";
+ return "Intrinsic result type #" + utostr(ArgNo);
}
bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
@@ -1706,9 +1700,13 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
const Type *RetTy = FTy->getReturnType();
const StructType *ST = dyn_cast<StructType>(RetTy);
- unsigned NumRets = 1;
- if (ST)
- NumRets = ST->getNumElements();
+ unsigned NumRetVals;
+ if (RetTy->isVoidTy())
+ NumRetVals = 0;
+ else if (ST)
+ NumRetVals = ST->getNumElements();
+ else
+ NumRetVals = 1;
if (VT < 0) {
int Match = ~VT;
@@ -1720,7 +1718,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
TruncatedElementVectorType)) != 0) {
const IntegerType *IEltTy = dyn_cast<IntegerType>(EltTy);
if (!VTy || !IEltTy) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
"an integral vector type.", F);
return false;
}
@@ -1728,7 +1726,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
// the type being matched against.
if ((Match & ExtendedElementVectorType) != 0) {
if ((IEltTy->getBitWidth() & 1) != 0) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " vector "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " vector "
"element bit-width is odd.", F);
return false;
}
@@ -1738,25 +1736,25 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
Match &= ~(ExtendedElementVectorType | TruncatedElementVectorType);
}
- if (Match <= static_cast<int>(NumRets - 1)) {
+ if (Match <= static_cast<int>(NumRetVals - 1)) {
if (ST)
RetTy = ST->getElementType(Match);
if (Ty != RetTy) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
"match return type.", F);
return false;
}
} else {
- if (Ty != FTy->getParamType(Match - NumRets)) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " does not "
- "match parameter %" + utostr(Match - NumRets) + ".", F);
+ if (Ty != FTy->getParamType(Match - NumRetVals)) {
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " does not "
+ "match parameter %" + utostr(Match - NumRetVals) + ".", F);
return false;
}
}
} else if (VT == MVT::iAny) {
if (!EltTy->isIntegerTy()) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
"an integer type.", F);
return false;
}
@@ -1781,7 +1779,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
}
} else if (VT == MVT::fAny) {
if (!EltTy->isFloatingPointTy()) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not "
"a floating-point type.", F);
return false;
}
@@ -1794,13 +1792,14 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
Suffix += EVT::getEVT(EltTy).getEVTString();
} else if (VT == MVT::vAny) {
if (!VTy) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a vector type.", F);
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a vector type.",
+ F);
return false;
}
Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
} else if (VT == MVT::iPTR) {
if (!Ty->isPointerTy()) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
"pointer and a pointer is required.", F);
return false;
}
@@ -1812,7 +1811,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
Suffix += ".p" + utostr(PTyp->getAddressSpace()) +
EVT::getEVT(PTyp->getElementType()).getEVTString();
} else {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is not a "
"pointer and a pointer is required.", F);
return false;
}
@@ -1832,10 +1831,10 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
}
} else if (EVT((MVT::SimpleValueType)VT).getTypeForEVT(Ty->getContext()) !=
EltTy) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is wrong!", F);
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is wrong!", F);
return false;
} else if (EltTy != Ty) {
- CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is a vector "
+ CheckFailed(IntrinsicParam(ArgNo, NumRetVals) + " is a vector "
"and a scalar is required.", F);
return false;
}
@@ -1847,10 +1846,10 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
/// Intrinsics.gen. This implements a little state machine that verifies the
/// prototype of intrinsics.
void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
- unsigned RetNum,
- unsigned ParamNum, ...) {
+ unsigned NumRetVals,
+ unsigned NumParams, ...) {
va_list VA;
- va_start(VA, ParamNum);
+ va_start(VA, NumParams);
const FunctionType *FTy = F->getFunctionType();
// For overloaded intrinsics, the Suffix of the function name must match the
@@ -1858,7 +1857,7 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
// suffix, to be checked at the end.
std::string Suffix;
- if (FTy->getNumParams() + FTy->isVarArg() != ParamNum) {
+ if (FTy->getNumParams() + FTy->isVarArg() != NumParams) {
CheckFailed("Intrinsic prototype has incorrect number of arguments!", F);
return;
}
@@ -1866,23 +1865,27 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
const Type *Ty = FTy->getReturnType();
const StructType *ST = dyn_cast<StructType>(Ty);
+ if (NumRetVals == 0 && !Ty->isVoidTy()) {
+ CheckFailed("Intrinsic should return void", F);
+ return;
+ }
+
// Verify the return types.
- if (ST && ST->getNumElements() != RetNum) {
+ if (ST && ST->getNumElements() != NumRetVals) {
CheckFailed("Intrinsic prototype has incorrect number of return types!", F);
return;
}
-
- for (unsigned ArgNo = 0; ArgNo < RetNum; ++ArgNo) {
+
+ for (unsigned ArgNo = 0; ArgNo != NumRetVals; ++ArgNo) {
int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
if (ST) Ty = ST->getElementType(ArgNo);
-
if (!PerformTypeCheck(ID, F, Ty, VT, ArgNo, Suffix))
break;
}
// Verify the parameter types.
- for (unsigned ArgNo = 0; ArgNo < ParamNum; ++ArgNo) {
+ for (unsigned ArgNo = 0; ArgNo != NumParams; ++ArgNo) {
int VT = va_arg(VA, int); // An MVT::SimpleValueType when non-negative.
if (VT == MVT::isVoid && ArgNo > 0) {
@@ -1891,8 +1894,8 @@ void Verifier::VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F,
break;
}
- if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT, ArgNo + RetNum,
- Suffix))
+ if (!PerformTypeCheck(ID, F, FTy->getParamType(ArgNo), VT,
+ ArgNo + NumRetVals, Suffix))
break;
}
OpenPOWER on IntegriCloud