summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-07-15 17:06:11 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-07-15 17:06:11 +0000
commitc1c3262b63b1d5fbba6a7ad188f4e47d92c7840e (patch)
tree5b6d391c72c9875f0065f0e772e872bc8544834b /lib
parent9112829d76cbb8e0c8ef51bbc2d7d1be48cd7b74 (diff)
downloadFreeBSD-src-c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e.zip
FreeBSD-src-c1c3262b63b1d5fbba6a7ad188f4e47d92c7840e.tar.gz
Update LLVM to r108428.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/InstructionSimplify.cpp48
-rw-r--r--lib/Analysis/ProfileInfo.cpp10
-rw-r--r--lib/AsmParser/LLParser.cpp94
-rw-r--r--lib/AsmParser/LLParser.h1
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp5
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp14
-rw-r--r--lib/CodeGen/LiveInterval.cpp17
-rw-r--r--lib/CodeGen/MachineLICM.cpp9
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp23
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp54
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp45
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp13
-rw-r--r--lib/MC/MCParser/AsmParser.cpp2
-rw-r--r--lib/Support/APFloat.cpp9
-rw-r--r--lib/Support/APInt.cpp6
-rw-r--r--lib/Support/Regex.cpp4
-rw-r--r--lib/Support/StringPool.cpp2
-rw-r--r--lib/System/Unix/Program.inc5
-rw-r--r--lib/Target/ARM/ARM.td4
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h5
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp466
-rw-r--r--lib/Target/ARM/ARMISelLowering.h21
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td41
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td145
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td37
-rw-r--r--lib/Target/ARM/ARMSubtarget.h4
-rw-r--r--lib/Target/ARM/AsmParser/ARMAsmParser.cpp14
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp6
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp2
-rw-r--r--lib/Target/ARM/README.txt67
-rw-r--r--lib/Target/Alpha/AlphaMCAsmInfo.cpp2
-rw-r--r--lib/Target/Alpha/AlphaMCAsmInfo.h4
-rw-r--r--lib/Target/Blackfin/BlackfinMCAsmInfo.cpp2
-rw-r--r--lib/Target/Blackfin/BlackfinMCAsmInfo.h4
-rw-r--r--lib/Target/CellSPU/SPUMCAsmInfo.cpp2
-rw-r--r--lib/Target/CellSPU/SPUMCAsmInfo.h4
-rw-r--r--lib/Target/MBlaze/MBlazeMCAsmInfo.cpp2
-rw-r--r--lib/Target/MBlaze/MBlazeMCAsmInfo.h4
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp3
-rw-r--r--lib/Target/MSIL/MSILWriter.h2
-rw-r--r--lib/Target/MSP430/MSP430MCAsmInfo.cpp2
-rw-r--r--lib/Target/MSP430/MSP430MCAsmInfo.h5
-rw-r--r--lib/Target/Mips/MipsMCAsmInfo.cpp2
-rw-r--r--lib/Target/Mips/MipsMCAsmInfo.h4
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp2
-rw-r--r--lib/Target/PIC16/PIC16MCAsmInfo.cpp2
-rw-r--r--lib/Target/PIC16/PIC16MCAsmInfo.h2
-rw-r--r--lib/Target/Sparc/SparcMCAsmInfo.cpp3
-rw-r--r--lib/Target/Sparc/SparcMCAsmInfo.h5
-rw-r--r--lib/Target/SystemZ/SystemZMCAsmInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZMCAsmInfo.h2
-rw-r--r--lib/Target/TargetMachine.cpp2
-rw-r--r--lib/Target/X86/AsmParser/X86AsmParser.cpp6
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp13
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp16
-rw-r--r--lib/Target/X86/X86COFF.h95
-rw-r--r--lib/Target/X86/X86FastISel.cpp7
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp74
-rw-r--r--lib/Target/X86/X86ISelLowering.h1
-rw-r--r--lib/Target/X86/X86InstrFormats.td3
-rw-r--r--lib/Target/X86/X86InstrInfo.h8
-rw-r--r--lib/Target/X86/X86InstrSSE.td116
-rw-r--r--lib/Target/X86/X86MCCodeEmitter.cpp3
-rw-r--r--lib/Target/XCore/XCoreMCAsmInfo.cpp2
-rw-r--r--lib/Target/XCore/XCoreMCAsmInfo.h5
-rw-r--r--lib/Transforms/IPO/Inliner.cpp2
-rw-r--r--lib/Transforms/IPO/MergeFunctions.cpp5
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp19
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp28
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp11
-rw-r--r--lib/Transforms/Utils/Local.cpp5
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp71
-rw-r--r--lib/VMCore/AsmWriter.cpp14
-rw-r--r--lib/VMCore/Metadata.cpp3
75 files changed, 1070 insertions, 674 deletions
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index dbefc2d..24cd343 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -440,27 +440,47 @@ void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
const TargetData *TD) {
assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
- // FromHandle - This keeps a weakvh on the from value so that we can know if
- // it gets deleted out from under us in a recursive simplification.
+ // FromHandle/ToHandle - This keeps a WeakVH on the from/to values so that
+ // we can know if it gets deleted out from under us or replaced in a
+ // recursive simplification.
WeakVH FromHandle(From);
+ WeakVH ToHandle(To);
while (!From->use_empty()) {
// Update the instruction to use the new value.
- Use &U = From->use_begin().getUse();
- Instruction *User = cast<Instruction>(U.getUser());
- U = To;
+ Use &TheUse = From->use_begin().getUse();
+ Instruction *User = cast<Instruction>(TheUse.getUser());
+ TheUse = To;
+
+ // Check to see if the instruction can be folded due to the operand
+ // replacement. For example changing (or X, Y) into (or X, -1) can replace
+ // the 'or' with -1.
+ Value *SimplifiedVal;
+ {
+ // Sanity check to make sure 'User' doesn't dangle across
+ // SimplifyInstruction.
+ AssertingVH<> UserHandle(User);
- // See if we can simplify it.
- if (Value *V = SimplifyInstruction(User, TD)) {
- // Recursively simplify this.
- ReplaceAndSimplifyAllUses(User, V, TD);
-
- // If the recursive simplification ended up revisiting and deleting 'From'
- // then we're done.
- if (FromHandle == 0)
- return;
+ SimplifiedVal = SimplifyInstruction(User, TD);
+ if (SimplifiedVal == 0) continue;
}
+
+ // Recursively simplify this user to the new value.
+ ReplaceAndSimplifyAllUses(User, SimplifiedVal, TD);
+ From = dyn_cast_or_null<Instruction>((Value*)FromHandle);
+ To = ToHandle;
+
+ assert(ToHandle && "To value deleted by recursive simplification?");
+
+ // If the recursive simplification ended up revisiting and deleting
+ // 'From' then we're done.
+ if (From == 0)
+ return;
}
+
+ // If 'From' has value handles referring to it, do a real RAUW to update them.
+ From->replaceAllUsesWith(To);
+
From->eraseFromParent();
}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 38dcd25..8d2712f 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -71,22 +71,24 @@ ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
// Are there zero predecessors of this block?
if (PI == PE) {
- Edge e = getEdge(0,BB);
+ Edge e = getEdge(0, BB);
Count = getEdgeWeight(e);
} else {
// Otherwise, if there are predecessors, the execution count of this block is
// the sum of the edge frequencies from the incoming edges.
std::set<const BasicBlock*> ProcessedPreds;
Count = 0;
- for (; PI != PE; ++PI)
- if (ProcessedPreds.insert(*PI).second) {
- double w = getEdgeWeight(getEdge(*PI, BB));
+ for (; PI != PE; ++PI) {
+ const BasicBlock *P = *PI;
+ if (ProcessedPreds.insert(P).second) {
+ double w = getEdgeWeight(getEdge(P, BB));
if (w == MissingValue) {
Count = MissingValue;
break;
}
Count += w;
}
+ }
}
// If the predecessors did not suffice to get block weight, try successors.
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 6752181..221b994 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -544,20 +544,21 @@ bool LLParser::ParseNamedMetadata() {
return true;
SmallVector<MDNode *, 8> Elts;
- do {
- // Null is a special case since it is typeless.
- if (EatIfPresent(lltok::kw_null)) {
- Elts.push_back(0);
- continue;
- }
+ if (Lex.getKind() != lltok::rbrace)
+ do {
+ // Null is a special case since it is typeless.
+ if (EatIfPresent(lltok::kw_null)) {
+ Elts.push_back(0);
+ continue;
+ }
- if (ParseToken(lltok::exclaim, "Expected '!' here"))
- return true;
+ if (ParseToken(lltok::exclaim, "Expected '!' here"))
+ return true;
- MDNode *N = 0;
- if (ParseMDNodeID(N)) return true;
- Elts.push_back(N);
- } while (EatIfPresent(lltok::comma));
+ MDNode *N = 0;
+ if (ParseMDNodeID(N)) return true;
+ Elts.push_back(N);
+ } while (EatIfPresent(lltok::comma));
if (ParseToken(lltok::rbrace, "expected end of metadata node"))
return true;
@@ -2021,33 +2022,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
ID.StrVal = Lex.getStrVal();
ID.Kind = ValID::t_LocalName;
break;
- case lltok::exclaim: // !{...} MDNode, !"foo" MDString
- Lex.Lex();
-
- if (EatIfPresent(lltok::lbrace)) {
- SmallVector<Value*, 16> Elts;
- if (ParseMDNodeVector(Elts, PFS) ||
- ParseToken(lltok::rbrace, "expected end of metadata node"))
- return true;
-
- ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
- ID.Kind = ValID::t_MDNode;
- return false;
- }
-
- // Standalone metadata reference
- // !{ ..., !42, ... }
- if (Lex.getKind() == lltok::APSInt) {
- if (ParseMDNodeID(ID.MDNodeVal)) return true;
- ID.Kind = ValID::t_MDNode;
- return false;
- }
-
- // MDString:
- // ::= '!' STRINGCONSTANT
- if (ParseMDString(ID.MDStringVal)) return true;
- ID.Kind = ValID::t_MDString;
- return false;
+ case lltok::exclaim: // !42, !{...}, or !"foo"
+ return ParseMetadataValue(ID, PFS);
case lltok::APSInt:
ID.APSIntVal = Lex.getAPSIntVal();
ID.Kind = ValID::t_APSInt;
@@ -2528,6 +2504,42 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
return false;
}
+/// ParseMetadataValue
+/// ::= !42
+/// ::= !{...}
+/// ::= !"string"
+bool LLParser::ParseMetadataValue(ValID &ID, PerFunctionState *PFS) {
+ assert(Lex.getKind() == lltok::exclaim);
+ Lex.Lex();
+
+ // MDNode:
+ // !{ ... }
+ if (EatIfPresent(lltok::lbrace)) {
+ SmallVector<Value*, 16> Elts;
+ if (ParseMDNodeVector(Elts, PFS) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ ID.MDNodeVal = MDNode::get(Context, Elts.data(), Elts.size());
+ ID.Kind = ValID::t_MDNode;
+ return false;
+ }
+
+ // Standalone metadata reference
+ // !42
+ if (Lex.getKind() == lltok::APSInt) {
+ if (ParseMDNodeID(ID.MDNodeVal)) return true;
+ ID.Kind = ValID::t_MDNode;
+ return false;
+ }
+
+ // MDString:
+ // ::= '!' STRINGCONSTANT
+ if (ParseMDString(ID.MDStringVal)) return true;
+ ID.Kind = ValID::t_MDString;
+ return false;
+}
+
//===----------------------------------------------------------------------===//
// Function Parsing.
@@ -3983,6 +3995,10 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
/// ::= 'null' | TypeAndValue
bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
PerFunctionState *PFS) {
+ // Check for an empty list.
+ if (Lex.getKind() == lltok::rbrace)
+ return false;
+
do {
// Null is a special case since it is typeless.
if (EatIfPresent(lltok::kw_null)) {
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index c8f669f..f765a2a 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -308,6 +308,7 @@ namespace llvm {
bool ParseGlobalValue(const Type *Ty, Constant *&V);
bool ParseGlobalTypeAndValue(Constant *&V);
bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
+ bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
// Function Parsing.
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 527ae49..b3f0776 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -820,7 +820,7 @@ bool BitcodeReader::ParseMetadata() {
IsFunctionLocal = true;
// fall-through
case bitc::METADATA_NODE: {
- if (Record.empty() || Record.size() % 2 == 1)
+ if (Record.size() % 2 == 1)
return Error("Invalid METADATA_NODE record");
unsigned Size = Record.size();
@@ -834,7 +834,8 @@ bool BitcodeReader::ParseMetadata() {
else
Elts.push_back(NULL);
}
- Value *V = MDNode::getWhenValsUnresolved(Context, &Elts[0], Elts.size(),
+ Value *V = MDNode::getWhenValsUnresolved(Context,
+ Elts.data(), Elts.size(),
IsFunctionLocal);
IsFunctionLocal = false;
MDValueList.AssignValue(V, NextMDValueNo++);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d9387a8..db1b37a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -178,7 +178,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (!M.getModuleInlineAsm().empty()) {
OutStreamer.AddComment("Start of file scope inline assembly");
OutStreamer.AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm(), 0/*no loc cookie*/);
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
OutStreamer.AddComment("End of file scope inline assembly");
OutStreamer.AddBlankLine();
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index f6f3bae..202d9b6 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -53,17 +53,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
}
SourceMgr SrcMgr;
-
- // Ensure the buffer is newline terminated.
- char *TmpString = 0;
- if (Str.back() != '\n') {
- TmpString = new char[Str.size() + 2];
- memcpy(TmpString, Str.data(), Str.size());
- TmpString[Str.size()] = '\n';
- TmpString[Str.size() + 1] = 0;
- isNullTerminated = true;
- Str = TmpString;
- }
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
@@ -95,9 +84,6 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
/*NoFinalize*/ true);
if (Res && !HasDiagHandler)
report_fatal_error("Error parsing inline asm\n");
-
- if (TmpString)
- delete[] TmpString;
}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 21a9b7d..ad57284 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -119,6 +119,7 @@ bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const {
//
bool LiveInterval::overlapsFrom(const LiveInterval& other,
const_iterator StartPos) const {
+ assert(!empty() && "empty interval");
const_iterator i = begin();
const_iterator ie = end();
const_iterator j = StartPos;
@@ -161,16 +162,8 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other,
/// by [Start, End).
bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const {
assert(Start < End && "Invalid range");
- const_iterator I = begin();
- const_iterator E = end();
- const_iterator si = std::upper_bound(I, E, Start);
- const_iterator ei = std::upper_bound(I, E, End);
- if (si != ei)
- return true;
- if (si == I)
- return false;
- --si;
- return si->contains(Start);
+ const_iterator I = std::lower_bound(begin(), end(), End);
+ return I != begin() && (--I)->end > Start;
}
/// extendIntervalEndTo - This method is used when we want to extend the range
@@ -868,6 +861,10 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
OS << "?";
else
OS << vni->def;
+ if (vni->hasPHIKill())
+ OS << "-phikill";
+ if (vni->hasRedefByEC())
+ OS << "-ec";
}
}
}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 956d21c..4c054f5 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -497,11 +497,6 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
/// candidate for LICM. e.g. If the instruction is a call, then it's obviously
/// not safe to hoist it.
bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
- // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they
- // are an argument to some other otherwise-hoistable instruction?
- if (I.isImplicitDef())
- return false;
-
// Check if it's safe to move the instruction.
bool DontMoveAcrossStore = true;
if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
@@ -717,7 +712,9 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
bool MachineLICM::EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
- if (CI == CSEMap.end())
+ // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
+ // the undef property onto uses.
+ if (CI == CSEMap.end() || MI->isImplicitDef())
return false;
if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 25284d6..15778b4 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -563,3 +563,26 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
return 0;
}
+namespace {
+ /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
+ /// by source location, to avoid depending on the arbitrary order that
+ /// instruction selection visits variables in.
+ struct VariableDebugSorter {
+ bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
+ const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
+ const {
+ if (A.second.second.getLine() != B.second.second.getLine())
+ return A.second.second.getLine() < B.second.second.getLine();
+ if (A.second.second.getCol() != B.second.second.getCol())
+ return A.second.second.getCol() < B.second.second.getCol();
+ return false;
+ }
+ };
+}
+
+MachineModuleInfo::VariableDbgInfoMapTy &
+MachineModuleInfo::getVariableDbgInfo() {
+ std::stable_sort(VariableDbgInfo.begin(), VariableDbgInfo.end(),
+ VariableDebugSorter());
+ return VariableDbgInfo;
+}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index ca4c477..2e31908 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -41,21 +41,51 @@ void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
- unsigned Reg, unsigned OpIdx,
- const TargetInstrInfo *tii_) {
+bool
+ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI,
+ unsigned Reg, unsigned OpIdx,
+ const TargetInstrInfo *tii_,
+ SmallSet<unsigned, 8> &ImpDefRegs) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg && DstSubReg == 0)
+ Reg == SrcReg &&
+ (DstSubReg == 0 || ImpDefRegs.count(DstReg)))
return true;
switch(OpIdx) {
- case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0;
- case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0;
- default: return false;
+ case 1:
+ return MI->isCopy() && (MI->getOperand(0).getSubReg() == 0 ||
+ ImpDefRegs.count(MI->getOperand(0).getReg()));
+ case 2:
+ return MI->isSubregToReg() && (MI->getOperand(0).getSubReg() == 0 ||
+ ImpDefRegs.count(MI->getOperand(0).getReg()));
+ default: return false;
}
}
+static bool isUndefCopy(MachineInstr *MI, unsigned Reg,
+ const TargetInstrInfo *tii_,
+ SmallSet<unsigned, 8> &ImpDefRegs) {
+ if (MI->isCopy()) {
+ MachineOperand &MO0 = MI->getOperand(0);
+ MachineOperand &MO1 = MI->getOperand(1);
+ if (MO1.getReg() != Reg)
+ return false;
+ if (!MO0.getSubReg() || ImpDefRegs.count(MO0.getReg()))
+ return true;
+ return false;
+ }
+
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) {
+ if (Reg != SrcReg)
+ return false;
+ if (DstSubReg == 0 || ImpDefRegs.count(DstReg))
+ return true;
+ }
+ return false;
+}
+
/// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure
/// there is one implicit_def for each use. Add isUndef marker to
/// implicit_def defs and their uses.
@@ -104,7 +134,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
// Eliminate %reg1032:sub<def> = COPY undef.
if (MI->isCopy() && MI->getOperand(0).getSubReg()) {
MachineOperand &MO = MI->getOperand(1);
- if (ImpDefRegs.count(MO.getReg())) {
+ if (MO.isUndef() || ImpDefRegs.count(MO.getReg())) {
if (MO.isKill()) {
LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg());
vi.removeKill(MI);
@@ -126,7 +156,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
if (!ImpDefRegs.count(Reg))
continue;
// Use is a copy, just turn it into an implicit_def.
- if (CanTurnIntoImplicitDef(MI, Reg, i, tii_)) {
+ if (CanTurnIntoImplicitDef(MI, Reg, i, tii_, ImpDefRegs)) {
bool isKill = MO.isKill();
MI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j)
@@ -223,11 +253,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineInstr *RMI = RUses[i];
// Turn a copy use into an implicit_def.
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg &&
- RMI->getOperand(0).getSubReg() == 0) ||
- (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
- Reg == SrcReg && DstSubReg == 0)) {
+ if (isUndefCopy(RMI, Reg, tii_, ImpDefRegs)) {
RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF));
bool isKill = false;
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 3f7e4a5..decaa76 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -135,7 +135,7 @@ unsigned FastISel::getRegForValue(const Value *V) {
!FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V))))
return FuncInfo.InitializeRegForValue(V);
- MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+ SavePoint SaveInsertPt = enterLocalValueArea();
// Materialize the value in a register. Emit any instructions in the
// local value area.
@@ -286,18 +286,22 @@ void FastISel::recomputeInsertPt() {
++FuncInfo.InsertPt;
}
-MachineBasicBlock::iterator FastISel::enterLocalValueArea() {
+FastISel::SavePoint FastISel::enterLocalValueArea() {
MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt;
+ DebugLoc OldDL = DL;
recomputeInsertPt();
- return OldInsertPt;
+ DL = DebugLoc();
+ SavePoint SP = { OldInsertPt, OldDL };
+ return SP;
}
-void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) {
+void FastISel::leaveLocalValueArea(SavePoint OldInsertPt) {
if (FuncInfo.InsertPt != FuncInfo.MBB->begin())
LastLocalValue = llvm::prior(FuncInfo.InsertPt);
// Restore the previous insert position.
- FuncInfo.InsertPt = OldInsertPt;
+ FuncInfo.InsertPt = OldInsertPt.InsertPt;
+ DL = OldInsertPt.DL;
}
/// SelectBinaryOp - Select and emit code for a binary operator instruction,
@@ -779,39 +783,8 @@ FastISel::SelectFNeg(const User *I) {
}
bool
-FastISel::SelectLoad(const User *I) {
- LoadInst *LI = const_cast<LoadInst *>(cast<LoadInst>(I));
-
- // For a load from an alloca, make a limited effort to find the value
- // already available in a register, avoiding redundant loads.
- if (!LI->isVolatile() && isa<AllocaInst>(LI->getPointerOperand())) {
- BasicBlock::iterator ScanFrom = LI;
- if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(),
- LI->getParent(), ScanFrom)) {
- if (!V->use_empty() &&
- (!isa<Instruction>(V) ||
- cast<Instruction>(V)->getParent() == LI->getParent() ||
- (isa<AllocaInst>(V) &&
- FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(V)))) &&
- (!isa<Argument>(V) ||
- LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) {
- unsigned ResultReg = getRegForValue(V);
- if (ResultReg != 0) {
- UpdateValueMap(I, ResultReg);
- return true;
- }
- }
- }
- }
-
- return false;
-}
-
-bool
FastISel::SelectOperator(const User *I, unsigned Opcode) {
switch (Opcode) {
- case Instruction::Load:
- return SelectLoad(I);
case Instruction::Add:
return SelectBinaryOp(I, ISD::ADD);
case Instruction::FAdd:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index d323c16..458e865 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -820,7 +820,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL);
+ return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
}
// Otherwise create a new SDValue and remember it.
@@ -3955,7 +3955,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
AliasAnalysis::NoAlias) {
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ false, I.getArgOperand(0), 0,
+ I.getArgOperand(1), 0));
return 0;
}
@@ -5522,10 +5523,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
break;
}
- if (OpInfo.ConstraintType == TargetLowering::C_Other) {
- assert(!OpInfo.isIndirect &&
- "Don't know how to handle indirect other inputs yet!");
+ // Treat indirect 'X' constraint as memory.
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
+ OpInfo.ConstraintType = TargetLowering::C_Memory;
+ if (OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0],
Ops, DAG);
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 793f3c7..e0949bd 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -712,7 +712,7 @@ bool AsmParser::ParseStatement() {
return HadError;
}
-bool AsmParser::ParseAssignment(const StringRef &Name) {
+bool AsmParser::ParseAssignment(StringRef Name) {
// FIXME: Use better location, we should use proper tokens.
SMLoc EqualLoc = Lexer.getLoc();
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 485bf4d..2e78557 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -761,7 +761,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
makeNaN();
}
-APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
+APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text)
{
assertArithmeticOK(ourSemantics);
initialize(&ourSemantics);
@@ -2185,8 +2185,7 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
}
APFloat::opStatus
-APFloat::convertFromHexadecimalString(const StringRef &s,
- roundingMode rounding_mode)
+APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
{
lostFraction lost_fraction = lfExactlyZero;
integerPart *significand;
@@ -2361,7 +2360,7 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
}
APFloat::opStatus
-APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mode)
+APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
{
decimalInfo D;
opStatus fs;
@@ -2471,7 +2470,7 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo
}
APFloat::opStatus
-APFloat::convertFromString(const StringRef &str, roundingMode rounding_mode)
+APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
{
assertArithmeticOK(*semantics);
assert(!str.empty() && "Invalid string length");
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 1341d21..262fa42 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -102,7 +102,7 @@ APInt::APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[])
clearUnusedBits();
}
-APInt::APInt(unsigned numbits, const StringRef& Str, uint8_t radix)
+APInt::APInt(unsigned numbits, StringRef Str, uint8_t radix)
: BitWidth(numbits), VAL(0) {
assert(BitWidth && "Bitwidth too small");
fromString(numbits, Str, radix);
@@ -613,7 +613,7 @@ APInt& APInt::flip(unsigned bitPosition) {
return *this;
}
-unsigned APInt::getBitsNeeded(const StringRef& str, uint8_t radix) {
+unsigned APInt::getBitsNeeded(StringRef str, uint8_t radix) {
assert(!str.empty() && "Invalid string length");
assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
"Radix should be 2, 8, 10, or 16!");
@@ -2046,7 +2046,7 @@ void APInt::udivrem(const APInt &LHS, const APInt &RHS,
divide(LHS, lhsWords, RHS, rhsWords, &Quotient, &Remainder);
}
-void APInt::fromString(unsigned numbits, const StringRef& str, uint8_t radix) {
+void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
// Check our assumptions here
assert(!str.empty() && "Invalid string length");
assert((radix == 10 || radix == 8 || radix == 16 || radix == 2) &&
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index a7631de..309ffb0 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -19,7 +19,7 @@
#include <string>
using namespace llvm;
-Regex::Regex(const StringRef &regex, unsigned Flags) {
+Regex::Regex(StringRef regex, unsigned Flags) {
unsigned flags = 0;
preg = new llvm_regex();
preg->re_endp = regex.end();
@@ -52,7 +52,7 @@ unsigned Regex::getNumMatches() const {
return preg->re_nsub;
}
-bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
+bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
// pmatch needs to have at least one element.
diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp
index 1ee917f..ff607cf 100644
--- a/lib/Support/StringPool.cpp
+++ b/lib/Support/StringPool.cpp
@@ -22,7 +22,7 @@ StringPool::~StringPool() {
assert(InternTable.empty() && "PooledStringPtr leaked!");
}
-PooledStringPtr StringPool::intern(const StringRef &Key) {
+PooledStringPtr StringPool::intern(StringRef Key) {
table_t::iterator I = InternTable.find(Key);
if (I != InternTable.end())
return PooledStringPtr(&*I);
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index 67018de..0209f5a 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -310,12 +310,9 @@ Program::Wait(unsigned secondsToWait,
// fact of having a handler at all causes the wait below to return with EINTR,
// unlike if we used SIG_IGN.
if (secondsToWait) {
-#if !defined(__HAIKU__) && !defined(__minix)
- Act.sa_sigaction = 0;
-#endif
+ memset(&Act, 0, sizeof(Act));
Act.sa_handler = TimeOutHandler;
sigemptyset(&Act.sa_mask);
- Act.sa_flags = 0;
sigaction(SIGALRM, &Act, &Old);
alarm(secondsToWait);
}
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index f1e6a9f..fa64d6c 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -48,6 +48,8 @@ def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true",
"Enable divide instructions">;
def FeatureT2ExtractPack: SubtargetFeature<"t2xtpk", "HasT2ExtractPack", "true",
"Enable Thumb2 extract and pack instructions">;
+def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true",
+ "FP compare + branch is slow">;
// Some processors have multiply-accumulate instructions that don't
// play nicely with other VFP instructions, and it's generally better
@@ -129,7 +131,7 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries,
// V7 Processors.
def : Processor<"cortex-a8", CortexA8Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureHasSlowVMLx,
- FeatureNEONForFP, FeatureT2ExtractPack]>;
+ FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2ExtractPack]>;
def : Processor<"cortex-a9", CortexA9Itineraries,
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureT2ExtractPack]>;
def : ProcNoItin<"cortex-m3", [ArchV7M, FeatureThumb2, FeatureHWDiv]>;
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index d316b13..92a13f1 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -519,9 +519,8 @@ namespace ARM_AM {
//
// This is stored in two operands [regaddr, align]. The first is the
// address register. The second operand is the value of the alignment
- // specifier to use or zero if no explicit alignment.
- // Valid alignments are: 0, 8, 16, and 32 bytes, depending on the specific
- // instruction.
+ // specifier in bytes or zero if no explicit alignment.
+ // Valid alignments depend on the specific instruction.
//===--------------------------------------------------------------------===//
// NEON Modified Immediates
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 98d8b85..0091df7 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::CMPZ: return "ARMISD::CMPZ";
case ARMISD::CMPFP: return "ARMISD::CMPFP";
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
+ case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
case ARMISD::CMOV: return "ARMISD::CMOV";
case ARMISD::CNEG: return "ARMISD::CNEG";
@@ -623,6 +624,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
+ case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
+ case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
case ARMISD::VDUP: return "ARMISD::VDUP";
case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
case ARMISD::VEXT: return "ARMISD::VEXT";
@@ -2216,7 +2219,7 @@ static bool isFloatingPointZero(SDValue Op) {
/// the given operands.
SDValue
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG,
+ SDValue &ARMcc, SelectionDAG &DAG,
DebugLoc dl) const {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
@@ -2268,48 +2271,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
CompareType = ARMISD::CMPZ;
break;
}
- ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
}
-static bool canBitcastToInt(SDNode *Op) {
- return Op->hasOneUse() &&
- ISD::isNormalLoad(Op) &&
- Op->getValueType(0) == MVT::f32;
-}
-
-static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
- if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
- return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
- Ld->getChain(), Ld->getBasePtr(),
- Ld->getSrcValue(), Ld->getSrcValueOffset(),
- Ld->isVolatile(), Ld->isNonTemporal(),
- Ld->getAlignment());
-
- llvm_unreachable("Unknown VFP cmp argument!");
-}
-
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
SDValue
-ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG,
+ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
DebugLoc dl) const {
- if (UnsafeFPMath && FiniteOnlyFPMath() &&
- (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
- CC == ISD::SETNE || CC == ISD::SETUNE) &&
- canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
- // If unsafe fp math optimization is enabled and there are no othter uses of
- // the CMP operands, and the condition code is EQ oe NE, we can optimize it
- // to an integer comparison.
- if (CC == ISD::SETOEQ)
- CC = ISD::SETEQ;
- else if (CC == ISD::SETUNE)
- CC = ISD::SETNE;
- LHS = bitcastToInt(LHS, DAG);
- RHS = bitcastToInt(RHS, DAG);
- return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
- }
-
SDValue Cmp;
if (!isFloatingPointZero(RHS))
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
@@ -2328,59 +2297,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
DebugLoc dl = Op.getDebugLoc();
if (LHS.getValueType() == MVT::i32) {
- SDValue ARMCC;
+ SDValue ARMcc;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
- return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
}
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
- ARMCC, CCR, Cmp);
+ ARMcc, CCR, Cmp);
if (CondCode2 != ARMCC::AL) {
- SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
// FIXME: Needs another CMP because flag can have but one use.
- SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
+ SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
- Result, TrueVal, ARMCC2, CCR, Cmp2);
+ Result, TrueVal, ARMcc2, CCR, Cmp2);
}
return Result;
}
+/// canChangeToInt - Given the fp compare operand, return true if it is suitable
+/// to morph to an integer compare sequence.
+static bool canChangeToInt(SDValue Op, bool &SeenZero,
+ const ARMSubtarget *Subtarget) {
+ SDNode *N = Op.getNode();
+ if (!N->hasOneUse())
+ // Otherwise it requires moving the value from fp to integer registers.
+ return false;
+ if (!N->getNumValues())
+ return false;
+ EVT VT = Op.getValueType();
+ if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
+ // f32 case is generally profitable. f64 case only makes sense when vcmpe +
+ // vmrs are very slow, e.g. cortex-a8.
+ return false;
+
+ if (isFloatingPointZero(Op)) {
+ SeenZero = true;
+ return true;
+ }
+ return ISD::isNormalLoad(N);
+}
+
+static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
+ if (isFloatingPointZero(Op))
+ return DAG.getConstant(0, MVT::i32);
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
+ return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ld->getBasePtr(),
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
+ SDValue &RetVal1, SDValue &RetVal2) {
+ if (isFloatingPointZero(Op)) {
+ RetVal1 = DAG.getConstant(0, MVT::i32);
+ RetVal2 = DAG.getConstant(0, MVT::i32);
+ return;
+ }
+
+ if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
+ SDValue Ptr = Ld->getBasePtr();
+ RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), Ptr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset(),
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ Ld->getAlignment());
+
+ EVT PtrType = Ptr.getValueType();
+ unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
+ PtrType, Ptr, DAG.getConstant(4, PtrType));
+ RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
+ Ld->getChain(), NewPtr,
+ Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
+ Ld->isVolatile(), Ld->isNonTemporal(),
+ NewAlign);
+ return;
+ }
+
+ llvm_unreachable("Unknown VFP cmp argument!");
+}
+
+/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
+/// f32 and even f64 comparisons to integer ones.
+SDValue
+ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Chain = Op.getOperand(0);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
+ DebugLoc dl = Op.getDebugLoc();
+
+ bool SeenZero = false;
+ if (canChangeToInt(LHS, SeenZero, Subtarget) &&
+ canChangeToInt(RHS, SeenZero, Subtarget) &&
+ // If one of the operand is zero, it's safe to ignore the NaN case.
+ (FiniteOnlyFPMath() || SeenZero)) {
+ // If unsafe fp math optimization is enabled and there are no othter uses of
+ // the CMP operands, and the condition code is EQ oe NE, we can optimize it
+ // to an integer comparison.
+ if (CC == ISD::SETOEQ)
+ CC = ISD::SETEQ;
+ else if (CC == ISD::SETUNE)
+ CC = ISD::SETNE;
+
+ SDValue ARMcc;
+ if (LHS.getValueType() == MVT::f32) {
+ LHS = bitcastf32Toi32(LHS, DAG);
+ RHS = bitcastf32Toi32(RHS, DAG);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
+ SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
+ return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
+ Chain, Dest, ARMcc, CCR, Cmp);
+ }
+
+ SDValue LHS1, LHS2;
+ SDValue RHS1, RHS2;
+ expandf64Toi32(LHS, DAG, LHS1, LHS2);
+ expandf64Toi32(RHS, DAG, RHS1, RHS2);
+ ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
+ ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
+ SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
+ return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
+ }
+
+ return SDValue();
+}
+
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
- SDValue Chain = Op.getOperand(0);
+ SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
- SDValue LHS = Op.getOperand(2);
- SDValue RHS = Op.getOperand(3);
- SDValue Dest = Op.getOperand(4);
+ SDValue LHS = Op.getOperand(2);
+ SDValue RHS = Op.getOperand(3);
+ SDValue Dest = Op.getOperand(4);
DebugLoc dl = Op.getDebugLoc();
if (LHS.getValueType() == MVT::i32) {
- SDValue ARMCC;
+ SDValue ARMcc;
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
- Chain, Dest, ARMCC, CCR,Cmp);
+ Chain, Dest, ARMcc, CCR, Cmp);
}
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
+
+ if (UnsafeFPMath &&
+ (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
+ CC == ISD::SETNE || CC == ISD::SETUNE)) {
+ SDValue Result = OptimizeVFPBrcond(Op, DAG);
+ if (Result.getNode())
+ return Result;
+ }
+
ARMCC::CondCodes CondCode, CondCode2;
FPCCToARMCC(CC, CondCode, CondCode2);
- SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
- SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
+ SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
+ SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
- SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
+ SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
if (CondCode2 != ARMCC::AL) {
- ARMCC = DAG.getConstant(CondCode2, MVT::i32);
- SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
+ ARMcc = DAG.getConstant(CondCode2, MVT::i32);
+ SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
}
return Res;
@@ -2469,12 +2563,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
EVT SrcVT = Tmp1.getValueType();
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
- SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
+ SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
- SDValue Cmp = getVFPCmp(Tmp1, FP0,
- ISD::SETLT, ARMCC, DAG, dl);
+ SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
+ return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
}
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
@@ -2553,51 +2646,18 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
}
/// getZeroVector - Returns a vector of specified type with all zero elements.
-///
+/// Zero vectors are used to represent vector negation and in those cases
+/// will be implemented with the NEON VNEG instruction. However, VNEG does
+/// not support i64 elements, so sometimes the zero vectors will need to be
+/// explicitly constructed. Regardless, use a canonical VMOV to create the
+/// zero vector.
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
assert(VT.isVector() && "Expected a vector type");
-
- // Zero vectors are used to represent vector negation and in those cases
- // will be implemented with the NEON VNEG instruction. However, VNEG does
- // not support i64 elements, so sometimes the zero vectors will need to be
- // explicitly constructed. For those cases, and potentially other uses in
- // the future, always build zero vectors as <16 x i8> or <8 x i8> bitcasted
- // to their dest type. This ensures they get CSE'd.
- SDValue Vec;
- SDValue Cst = DAG.getTargetConstant(0, MVT::i8);
- SmallVector<SDValue, 8> Ops;
- MVT TVT;
-
- if (VT.getSizeInBits() == 64) {
- Ops.assign(8, Cst); TVT = MVT::v8i8;
- } else {
- Ops.assign(16, Cst); TVT = MVT::v16i8;
- }
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
-}
-
-/// getOnesVector - Returns a vector of specified type with all bits set.
-///
-static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
- assert(VT.isVector() && "Expected a vector type");
-
- // Always build ones vectors as <16 x i8> or <8 x i8> bitcasted to their
- // dest type. This ensures they get CSE'd.
- SDValue Vec;
- SDValue Cst = DAG.getTargetConstant(0xFF, MVT::i8);
- SmallVector<SDValue, 8> Ops;
- MVT TVT;
-
- if (VT.getSizeInBits() == 64) {
- Ops.assign(8, Cst); TVT = MVT::v8i8;
- } else {
- Ops.assign(16, Cst); TVT = MVT::v16i8;
- }
- Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, TVT, &Ops[0], Ops.size());
-
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vec);
+ // The canonical modified immediate encoding of a zero vector is....0!
+ SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
+ EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
}
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
@@ -2611,7 +2671,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMCC;
+ SDValue ARMcc;
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -2627,9 +2687,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, dl);
+ ARMcc, DAG, dl);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
- SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
+ SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
@@ -2647,7 +2707,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue ShOpLo = Op.getOperand(0);
SDValue ShOpHi = Op.getOperand(1);
SDValue ShAmt = Op.getOperand(2);
- SDValue ARMCC;
+ SDValue ARMcc;
assert(Op.getOpcode() == ISD::SHL_PARTS);
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
@@ -2661,9 +2721,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, dl);
+ ARMcc, DAG, dl);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
- SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
+ SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
CCR, Cmp);
SDValue Ops[2] = { Lo, Hi };
@@ -2850,13 +2910,11 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
/// isNEONModifiedImm - Check if the specified splat value corresponds to a
/// valid vector constant for a NEON instruction with a "modified immediate"
-/// operand (e.g., VMOV). If so, return either the constant being
-/// splatted or the encoded value, depending on the DoEncode parameter.
+/// operand (e.g., VMOV). If so, return the encoded value.
static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
unsigned SplatBitSize, SelectionDAG &DAG,
- bool isVMOV, bool DoEncode) {
+ EVT &VT, bool is128Bits, bool isVMOV) {
unsigned OpCmode, Imm;
- EVT VT;
// SplatBitSize is set to the smallest size that splats the vector, so a
// zero vector will always have SplatBitSize == 8. However, NEON modified
@@ -2868,16 +2926,18 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
switch (SplatBitSize) {
case 8:
+ if (!isVMOV)
+ return SDValue();
// Any 1-byte value is OK. Op=0, Cmode=1110.
assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
OpCmode = 0xe;
Imm = SplatBits;
- VT = MVT::i8;
+ VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
break;
case 16:
// NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
- VT = MVT::i16;
+ VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x00nn: Op=x, Cmode=100x.
OpCmode = 0x8;
@@ -2897,7 +2957,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
// * only one byte is nonzero, or
// * the least significant byte is 0xff and the second byte is nonzero, or
// * the least significant 2 bytes are 0xff and the third is nonzero.
- VT = MVT::i32;
+ VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
if ((SplatBits & ~0xff) == 0) {
// Value = 0x000000nn: Op=x, Cmode=000x.
OpCmode = 0;
@@ -2949,9 +3009,9 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
return SDValue();
case 64: {
- // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
if (!isVMOV)
return SDValue();
+ // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
uint64_t BitMask = 0xff;
uint64_t Val = 0;
unsigned ImmMask = 1;
@@ -2969,7 +3029,7 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
// Op=1, Cmode=1110.
OpCmode = 0x1e;
SplatBits = Val;
- VT = MVT::i64;
+ VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
break;
}
@@ -2978,32 +3038,8 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
return SDValue();
}
- if (DoEncode) {
- unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
- return DAG.getTargetConstant(EncodedVal, MVT::i32);
- }
- return DAG.getTargetConstant(SplatBits, VT);
-}
-
-/// getNEONModImm - If this is a valid vector constant for a NEON instruction
-/// with a "modified immediate" operand (e.g., VMOV) of the specified element
-/// size, return the encoded value for that immediate. The ByteSize field
-/// indicates the number of bytes of each element [1248].
-SDValue ARM::getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
- SelectionDAG &DAG) {
- BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
- APInt SplatBits, SplatUndef;
- unsigned SplatBitSize;
- bool HasAnyUndefs;
- if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
- HasAnyUndefs, ByteSize * 8))
- return SDValue();
-
- if (SplatBitSize > ByteSize * 8)
- return SDValue();
-
- return isNEONModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
- SplatBitSize, DAG, isVMOV, true);
+ unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
+ return DAG.getTargetConstant(EncodedVal, MVT::i32);
}
static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT,
@@ -3194,43 +3230,6 @@ static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
-
-static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
- // Canonicalize all-zeros and all-ones vectors.
- ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
- if (ConstVal->isNullValue())
- return getZeroVector(VT, DAG, dl);
- if (ConstVal->isAllOnesValue())
- return getOnesVector(VT, DAG, dl);
-
- EVT CanonicalVT;
- if (VT.is64BitVector()) {
- switch (Val.getValueType().getSizeInBits()) {
- case 8: CanonicalVT = MVT::v8i8; break;
- case 16: CanonicalVT = MVT::v4i16; break;
- case 32: CanonicalVT = MVT::v2i32; break;
- case 64: CanonicalVT = MVT::v1i64; break;
- default: llvm_unreachable("unexpected splat element type"); break;
- }
- } else {
- assert(VT.is128BitVector() && "unknown splat vector size");
- switch (Val.getValueType().getSizeInBits()) {
- case 8: CanonicalVT = MVT::v16i8; break;
- case 16: CanonicalVT = MVT::v8i16; break;
- case 32: CanonicalVT = MVT::v4i32; break;
- case 64: CanonicalVT = MVT::v2i64; break;
- default: llvm_unreachable("unexpected splat element type"); break;
- }
- }
-
- // Build a canonical splat for this value.
- SmallVector<SDValue, 8> Ops;
- Ops.assign(CanonicalVT.getVectorNumElements(), Val);
- SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, &Ops[0],
- Ops.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Res);
-}
-
// If this is a case we can't handle, return null and let the default
// expansion code take care of it.
static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
@@ -3244,11 +3243,25 @@ static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
if (SplatBitSize <= 64) {
// Check if an immediate VMOV works.
+ EVT VmovVT;
SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
- SplatUndef.getZExtValue(),
- SplatBitSize, DAG, true, false);
- if (Val.getNode())
- return BuildSplat(Val, VT, DAG, dl);
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(), true);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ }
+
+ // Try an immediate VMVN.
+ uint64_t NegatedImm = (SplatBits.getZExtValue() ^
+ ((1LL << SplatBitSize) - 1));
+ Val = isNEONModifiedImm(NegatedImm,
+ SplatUndef.getZExtValue(), SplatBitSize,
+ DAG, VmovVT, VT.is128BitVector(), false);
+ if (Val.getNode()) {
+ SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov);
+ }
}
}
@@ -3825,6 +3838,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
return BB;
}
+static
+MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
+ for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I)
+ if (*I != Succ)
+ return *I;
+ llvm_unreachable("Expecting a BB with two successors!");
+}
+
MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB) const {
@@ -3941,6 +3963,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
return BB;
}
+ case ARM::BCCi64:
+ case ARM::BCCZi64: {
+ // Compare both parts that make up the double comparison separately for
+ // equality.
+ bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
+
+ unsigned LHS1 = MI->getOperand(1).getReg();
+ unsigned LHS2 = MI->getOperand(2).getReg();
+ if (RHSisZero) {
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS1).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(LHS2).addImm(0)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ } else {
+ unsigned RHS1 = MI->getOperand(3).getReg();
+ unsigned RHS2 = MI->getOperand(4).getReg();
+ AddDefaultPred(BuildMI(BB, dl,
+ TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS1).addReg(RHS1));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(LHS2).addReg(RHS2)
+ .addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ }
+
+ MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
+ MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
+ if (MI->getOperand(0).getImm() == ARMCC::NE)
+ std::swap(destMBB, exitMBB);
+
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
+ .addMBB(exitMBB);
+
+ MI->eraseFromParent(); // The pseudo instruction is gone now.
+ return BB;
+ }
+
case ARM::tANDsp:
case ARM::tADDspr_:
case ARM::tSUBspi_:
@@ -4180,6 +4242,35 @@ static SDValue PerformVMOVRRDCombine(SDNode *N,
return SDValue();
}
+/// PerformVDUPLANECombine - Target-specific dag combine xforms for
+/// ARMISD::VDUPLANE.
+static SDValue PerformVDUPLANECombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
+ // redundant.
+ SDValue Op = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+
+ // Ignore bit_converts.
+ while (Op.getOpcode() == ISD::BIT_CONVERT)
+ Op = Op.getOperand(0);
+ if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
+ return SDValue();
+
+ // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
+ unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+ // The canonical VMOV for a zero vector uses a 32-bit element size.
+ unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ unsigned EltBits;
+ if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
+ EltSize = 8;
+ if (EltSize > VT.getVectorElementType().getSizeInBits())
+ return SDValue();
+
+ SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op);
+ return DCI.CombineTo(N, Res, false);
+}
+
/// getVShiftImm - Check if this is a valid build_vector for the immediate
/// operand of a vector shift operation, where all the elements of the
/// build_vector must have the same constant integer value.
@@ -4558,6 +4649,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
+ case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3a38669..128b72e 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -53,6 +53,8 @@ namespace llvm {
CMOV, // ARM conditional move instructions.
CNEG, // ARM conditional negate instructions.
+ BCC_i64,
+
RBIT, // ARM bitreverse instruction
FTOSI, // FP to sint within a FP register.
@@ -122,6 +124,10 @@ namespace llvm {
VGETLANEu, // zero-extend vector extract element
VGETLANEs, // sign-extend vector extract element
+ // Vector move immediate and move negated immediate:
+ VMOVIMM,
+ VMVNIMM,
+
// Vector duplicate:
VDUP,
VDUPLANE,
@@ -150,13 +156,6 @@ namespace llvm {
/// Define some predicates that are used for node matching.
namespace ARM {
- /// getNEONModImm - If this is a valid vector constant for a NEON
- /// instruction with a "modified immediate" operand (e.g., VMOV) of the
- /// specified element size, return the encoded value for that immediate.
- /// The ByteSize field indicates the number of bytes of each element [1248].
- SDValue getNEONModImm(SDNode *N, unsigned ByteSize, bool isVMOV,
- SelectionDAG &DAG);
-
/// getVFPf32Imm / getVFPf64Imm - If the given fp immediate can be
/// materialized with a VMOV.f32 / VMOV.f64 (i.e. fconsts / fconstd)
/// instruction, returns its 8-bit integer representation. Otherwise,
@@ -363,9 +362,11 @@ namespace llvm {
DebugLoc dl, SelectionDAG &DAG) const;
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
- SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue getVFPCmp(SDValue LHS, SDValue RHS,
+ SelectionDAG &DAG, DebugLoc dl) const;
+
+ SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
MachineBasicBlock *BB,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index c73e204..51fc152 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -38,6 +38,12 @@ def SDT_ARMBr2JT : SDTypeProfile<0, 4,
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
+ [SDTCisVT<0, i32>,
+ SDTCisVT<1, i32>, SDTCisVT<2, i32>,
+ SDTCisVT<3, i32>, SDTCisVT<4, i32>,
+ SDTCisVT<5, OtherVT>]>;
+
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
@@ -90,6 +96,9 @@ def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
[SDNPHasChain]>;
+def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
+ [SDNPHasChain]>;
+
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
[SDNPOutFlag]>;
@@ -1685,13 +1694,19 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
}
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
(SUBri GPR:$src, so_imm_neg:$imm)>;
-
-//def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
-// (SUBSri GPR:$src, so_imm_neg:$imm)>;
-//def : ARMPat<(adde GPR:$src, so_imm_neg:$imm),
-// (SBCri GPR:$src, so_imm_neg:$imm)>;
+def : ARMPat<(addc GPR:$src, so_imm_neg:$imm),
+ (SUBSri GPR:$src, so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
+def : ARMPat<(adde GPR:$src, so_imm_not:$imm),
+ (SBCri GPR:$src, so_imm_not:$imm)>;
// Note: These are implemented in C++ code, because they have to generate
// ADD/SUBrs instructions, which use a complex pattern that a xform function
@@ -2279,6 +2294,22 @@ defm CMNz : AI1_cmp_irs<0b1011, "cmn",
def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
(CMNzri GPR:$src, so_imm_neg:$imm)>;
+// Pseudo i64 compares for some floating point compares.
+let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
+ Defs = [CPSR] in {
+def BCCi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
+ IIC_Br,
+ "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
+
+def BCCZi64 : PseudoInst<(outs),
+ (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
+ IIC_Br,
+ "${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
+ [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
+} // usesCustomInserter
+
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index a84315f..7f7eb98 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -65,6 +65,10 @@ def SDTARMVGETLN : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
def NEONvgetlaneu : SDNode<"ARMISD::VGETLANEu", SDTARMVGETLN>;
def NEONvgetlanes : SDNode<"ARMISD::VGETLANEs", SDTARMVGETLN>;
+def SDTARMVMOVIMM : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVT<1, i32>]>;
+def NEONvmovImm : SDNode<"ARMISD::VMOVIMM", SDTARMVMOVIMM>;
+def NEONvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>;
+
def NEONvdup : SDNode<"ARMISD::VDUP", SDTypeProfile<1, 1, [SDTCisVec<0>]>>;
// VDUPLANE can produce a quad-register result from a double-register source,
@@ -94,6 +98,20 @@ def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+def NEONimmAllZerosV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 32 && EltVal == 0);
+}]>;
+
+def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
+ ConstantSDNode *ConstVal = cast<ConstantSDNode>(N->getOperand(0));
+ unsigned EltBits;
+ uint64_t EltVal = ARM_AM::decodeNEONModImm(ConstVal->getZExtValue(), EltBits);
+ return (EltBits == 8 && EltVal == 0xff);
+}]>;
+
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
@@ -2318,10 +2336,10 @@ defm VTST : N3V_QHS<0, 0, 0b1000, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
// Vector Bitwise Operations.
-def vnot8 : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v8i8 immAllOnesV)))>;
-def vnot16 : PatFrag<(ops node:$in),
- (xor node:$in, (bitconvert (v16i8 immAllOnesV)))>;
+def vnotd : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v8i8 NEONimmAllOnesV)))>;
+def vnotq : PatFrag<(ops node:$in),
+ (xor node:$in, (bitconvert (v16i8 NEONimmAllOnesV)))>;
// VAND : Vector Bitwise AND
@@ -2347,36 +2365,58 @@ def VBICd : N3VX<0, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
"vbic", "$dst, $src1, $src2", "",
[(set DPR:$dst, (v2i32 (and DPR:$src1,
- (vnot8 DPR:$src2))))]>;
+ (vnotd DPR:$src2))))]>;
def VBICq : N3VX<0, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
"vbic", "$dst, $src1, $src2", "",
[(set QPR:$dst, (v4i32 (and QPR:$src1,
- (vnot16 QPR:$src2))))]>;
+ (vnotq QPR:$src2))))]>;
// VORN : Vector Bitwise OR NOT
def VORNd : N3VX<0, 0, 0b11, 0b0001, 0, 1, (outs DPR:$dst),
(ins DPR:$src1, DPR:$src2), N3RegFrm, IIC_VBINiD,
"vorn", "$dst, $src1, $src2", "",
[(set DPR:$dst, (v2i32 (or DPR:$src1,
- (vnot8 DPR:$src2))))]>;
+ (vnotd DPR:$src2))))]>;
def VORNq : N3VX<0, 0, 0b11, 0b0001, 1, 1, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2), N3RegFrm, IIC_VBINiQ,
"vorn", "$dst, $src1, $src2", "",
[(set QPR:$dst, (v4i32 (or QPR:$src1,
- (vnot16 QPR:$src2))))]>;
+ (vnotq QPR:$src2))))]>;
+
+// VMVN : Vector Bitwise NOT (Immediate)
+
+let isReMaterializable = 1 in {
+def VMVNv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 1, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$dst, $SIMM", "",
+ [(set DPR:$dst, (v4i16 (NEONvmvnImm timm:$SIMM)))]>;
+def VMVNv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 1, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i16", "$dst, $SIMM", "",
+ [(set QPR:$dst, (v8i16 (NEONvmvnImm timm:$SIMM)))]>;
+
+def VMVNv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 1, 1, (outs DPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$dst, $SIMM", "",
+ [(set DPR:$dst, (v2i32 (NEONvmvnImm timm:$SIMM)))]>;
+def VMVNv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 1, 1, (outs QPR:$dst),
+ (ins nModImm:$SIMM), IIC_VMOVImm,
+ "vmvn", "i32", "$dst, $SIMM", "",
+ [(set QPR:$dst, (v4i32 (NEONvmvnImm timm:$SIMM)))]>;
+}
// VMVN : Vector Bitwise NOT
def VMVNd : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 0, 0,
(outs DPR:$dst), (ins DPR:$src), IIC_VSUBiD,
"vmvn", "$dst, $src", "",
- [(set DPR:$dst, (v2i32 (vnot8 DPR:$src)))]>;
+ [(set DPR:$dst, (v2i32 (vnotd DPR:$src)))]>;
def VMVNq : N2VX<0b11, 0b11, 0b00, 0b00, 0b01011, 1, 0,
(outs QPR:$dst), (ins QPR:$src), IIC_VSUBiD,
"vmvn", "$dst, $src", "",
- [(set QPR:$dst, (v4i32 (vnot16 QPR:$src)))]>;
-def : Pat<(v2i32 (vnot8 DPR:$src)), (VMVNd DPR:$src)>;
-def : Pat<(v4i32 (vnot16 QPR:$src)), (VMVNq QPR:$src)>;
+ [(set QPR:$dst, (v4i32 (vnotq QPR:$src)))]>;
+def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
+def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
// VBSL : Vector Bitwise Select
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
@@ -2385,14 +2425,14 @@ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$dst),
"vbsl", "$dst, $src2, $src3", "$src1 = $dst",
[(set DPR:$dst,
(v2i32 (or (and DPR:$src2, DPR:$src1),
- (and DPR:$src3, (vnot8 DPR:$src1)))))]>;
+ (and DPR:$src3, (vnotd DPR:$src1)))))]>;
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$dst),
(ins QPR:$src1, QPR:$src2, QPR:$src3),
N3RegFrm, IIC_VCNTiQ,
"vbsl", "$dst, $src2, $src3", "$src1 = $dst",
[(set QPR:$dst,
(v4i32 (or (and QPR:$src2, QPR:$src1),
- (and QPR:$src3, (vnot16 QPR:$src1)))))]>;
+ (and QPR:$src3, (vnotq QPR:$src1)))))]>;
// VBIF : Vector Bitwise Insert if False
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
@@ -2726,20 +2766,19 @@ defm VQABS : N2VInt_QHS<0b11, 0b11, 0b00, 0b01110, 0,
// Vector Negate.
-def vneg : PatFrag<(ops node:$in), (sub immAllZerosV, node:$in)>;
-def vneg8 : PatFrag<(ops node:$in),
- (sub (bitconvert (v8i8 immAllZerosV)), node:$in)>;
-def vneg16 : PatFrag<(ops node:$in),
- (sub (bitconvert (v16i8 immAllZerosV)), node:$in)>;
+def vnegd : PatFrag<(ops node:$in),
+ (sub (bitconvert (v2i32 NEONimmAllZerosV)), node:$in)>;
+def vnegq : PatFrag<(ops node:$in),
+ (sub (bitconvert (v4i32 NEONimmAllZerosV)), node:$in)>;
class VNEGD<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 0, 0, (outs DPR:$dst), (ins DPR:$src),
IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set DPR:$dst, (Ty (vneg8 DPR:$src)))]>;
+ [(set DPR:$dst, (Ty (vnegd DPR:$src)))]>;
class VNEGQ<bits<2> size, string OpcodeStr, string Dt, ValueType Ty>
: N2V<0b11, 0b11, size, 0b01, 0b00111, 1, 0, (outs QPR:$dst), (ins QPR:$src),
IIC_VSHLiD, OpcodeStr, Dt, "$dst, $src", "",
- [(set QPR:$dst, (Ty (vneg16 QPR:$src)))]>;
+ [(set QPR:$dst, (Ty (vnegq QPR:$src)))]>;
// VNEG : Vector Negate (integer)
def VNEGs8d : VNEGD<0b00, "vneg", "s8", v8i8>;
@@ -2759,12 +2798,12 @@ def VNEGf32q : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 1, 0,
"vneg", "f32", "$dst, $src", "",
[(set QPR:$dst, (v4f32 (fneg QPR:$src)))]>;
-def : Pat<(v8i8 (vneg8 DPR:$src)), (VNEGs8d DPR:$src)>;
-def : Pat<(v4i16 (vneg8 DPR:$src)), (VNEGs16d DPR:$src)>;
-def : Pat<(v2i32 (vneg8 DPR:$src)), (VNEGs32d DPR:$src)>;
-def : Pat<(v16i8 (vneg16 QPR:$src)), (VNEGs8q QPR:$src)>;
-def : Pat<(v8i16 (vneg16 QPR:$src)), (VNEGs16q QPR:$src)>;
-def : Pat<(v4i32 (vneg16 QPR:$src)), (VNEGs32q QPR:$src)>;
+def : Pat<(v8i8 (vnegd DPR:$src)), (VNEGs8d DPR:$src)>;
+def : Pat<(v4i16 (vnegd DPR:$src)), (VNEGs16d DPR:$src)>;
+def : Pat<(v2i32 (vnegd DPR:$src)), (VNEGs32d DPR:$src)>;
+def : Pat<(v16i8 (vnegq QPR:$src)), (VNEGs8q QPR:$src)>;
+def : Pat<(v8i16 (vnegq QPR:$src)), (VNEGs16q QPR:$src)>;
+def : Pat<(v4i32 (vnegq QPR:$src)), (VNEGs32q QPR:$src)>;
// VQNEG : Vector Saturating Negate
defm VQNEG : N2VInt_QHS<0b11, 0b11, 0b00, 0b01111, 0,
@@ -2818,74 +2857,42 @@ def VMOVQQQQ : PseudoInst<(outs QQQQPR:$dst), (ins QQQQPR:$src),
// VMOV : Vector Move (Immediate)
-// VMOV_get_imm8 xform function: convert build_vector to VMOV.i8 imm.
-def VMOV_get_imm8 : SDNodeXForm<build_vector, [{
- return ARM::getNEONModImm(N, 1, true, *CurDAG);
-}]>;
-def vmovImm8 : PatLeaf<(build_vector), [{
- return ARM::getNEONModImm(N, 1, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm8>;
-
-// VMOV_get_imm16 xform function: convert build_vector to VMOV.i16 imm.
-def VMOV_get_imm16 : SDNodeXForm<build_vector, [{
- return ARM::getNEONModImm(N, 2, true, *CurDAG);
-}]>;
-def vmovImm16 : PatLeaf<(build_vector), [{
- return ARM::getNEONModImm(N, 2, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm16>;
-
-// VMOV_get_imm32 xform function: convert build_vector to VMOV.i32 imm.
-def VMOV_get_imm32 : SDNodeXForm<build_vector, [{
- return ARM::getNEONModImm(N, 4, true, *CurDAG);
-}]>;
-def vmovImm32 : PatLeaf<(build_vector), [{
- return ARM::getNEONModImm(N, 4, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm32>;
-
-// VMOV_get_imm64 xform function: convert build_vector to VMOV.i64 imm.
-def VMOV_get_imm64 : SDNodeXForm<build_vector, [{
- return ARM::getNEONModImm(N, 8, true, *CurDAG);
-}]>;
-def vmovImm64 : PatLeaf<(build_vector), [{
- return ARM::getNEONModImm(N, 8, true, *CurDAG).getNode() != 0;
-}], VMOV_get_imm64>;
-
let isReMaterializable = 1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
- [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
+ [(set DPR:$dst, (v8i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$dst, $SIMM", "",
- [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
+ [(set QPR:$dst, (v16i8 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
- [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
+ [(set DPR:$dst, (v4i16 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv8i16 : N1ModImm<1, 0b000, {1,0,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i16", "$dst, $SIMM", "",
- [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
+ [(set QPR:$dst, (v8i16 (NEONvmovImm timm:$SIMM)))]>;
-def VMOVv2i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 0, 0, 1, (outs DPR:$dst),
+def VMOVv2i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 0, 0, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
- [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
-def VMOVv4i32 : N1ModImm<1, 0b000, {0,?,?,0}, 0, 1, 0, 1, (outs QPR:$dst),
+ [(set DPR:$dst, (v2i32 (NEONvmovImm timm:$SIMM)))]>;
+def VMOVv4i32 : N1ModImm<1, 0b000, {?,?,?,?}, 0, 1, 0, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i32", "$dst, $SIMM", "",
- [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
+ [(set QPR:$dst, (v4i32 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
- [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
+ [(set DPR:$dst, (v1i64 (NEONvmovImm timm:$SIMM)))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
(ins nModImm:$SIMM), IIC_VMOVImm,
"vmov", "i64", "$dst, $SIMM", "",
- [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
+ [(set QPR:$dst, (v2i64 (NEONvmovImm timm:$SIMM)))]>;
} // isReMaterializable
// VMOV : Vector Get Lane (move scalar to ARM core register)
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 4692f2a..bbe675e 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -122,6 +122,10 @@ def imm0_255_neg : PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 255;
}], imm_neg_XFORM>;
+def imm0_255_not : PatLeaf<(i32 imm), [{
+ return (uint32_t)(~N->getZExtValue()) < 255;
+}], imm_comp_XFORM>;
+
// Define Thumb2 specific addressing modes.
// t2addrmode_imm12 := reg + imm12
@@ -1391,13 +1395,32 @@ defm t2RSBS : T2I_rbin_s_is <0b1110, "rsb",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
// (sub X, imm) gets canonicalized to (add X, -imm). Match this form.
+// The assume-no-carry-in form uses the negation of the input since add/sub
+// assume opposite meanings of the carry flag (i.e., carry == !borrow).
+// See the definition of AddWithCarry() in the ARM ARM A2.2.1 for the gory
+// details.
+// The AddedComplexity preferences the first variant over the others since
+// it can be shrunk to a 16-bit wide encoding, while the others cannot.
+let AddedComplexity = 1 in
+def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
+ (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
+ (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+let AddedComplexity = 1 in
+def : T2Pat<(addc GPR:$src, imm0_255_neg:$imm),
+ (t2SUBSri GPR:$src, imm0_255_neg:$imm)>;
+def : T2Pat<(addc GPR:$src, t2_so_imm_neg:$imm),
+ (t2SUBSri GPR:$src, t2_so_imm_neg:$imm)>;
+// The with-carry-in form matches bitwise not instead of the negation.
+// Effectively, the inverse interpretation of the carry flag already accounts
+// for part of the negation.
let AddedComplexity = 1 in
-def : T2Pat<(add GPR:$src, imm0_255_neg:$imm),
- (t2SUBri GPR:$src, imm0_255_neg:$imm)>;
-def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
- (t2SUBri GPR:$src, t2_so_imm_neg:$imm)>;
-def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
- (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(adde GPR:$src, imm0_255_not:$imm),
+ (t2SBCSri GPR:$src, imm0_255_not:$imm)>;
+def : T2Pat<(adde GPR:$src, t2_so_imm_not:$imm),
+ (t2SBCSri GPR:$src, t2_so_imm_not:$imm)>;
// Select Bytes -- for disassembly only
@@ -2435,7 +2458,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def t2LDM_RET : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p,
reglist:$dsts, variable_ops), IIC_Br,
- "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts",
+ "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts",
"$addr.addr = $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 8332bba..e7d92ed 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -54,6 +54,9 @@ protected:
/// the VML[AS] instructions are slow (if so, don't use them).
bool SlowVMLx;
+ /// SlowFPBrcc - True if floating point compare + branch is slow.
+ bool SlowFPBrcc;
+
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -133,6 +136,7 @@ protected:
bool hasDivide() const { return HasHardwareDivide; }
bool hasT2ExtractPack() const { return HasT2ExtractPack; }
bool useVMLx() const {return hasVFP2() && !SlowVMLx; }
+ bool isFPBrccSlow() const { return SlowFPBrcc; }
bool hasFP16() const { return HasFP16; }
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 8415d1a..4b08324 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -88,7 +88,7 @@ private:
/// its register number, or -1 if there is no match. To allow return values
/// to be used directly in register lists, arm registers have values between
/// 0 and 15.
- int MatchRegisterName(const StringRef &Name);
+ int MatchRegisterName(StringRef Name);
/// }
@@ -97,7 +97,7 @@ public:
ARMAsmParser(const Target &T, MCAsmParser &_Parser)
: TargetAsmParser(T), Parser(_Parser) {}
- virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
@@ -517,7 +517,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return true;
- const StringRef &ShiftName = Tok.getString();
+ StringRef ShiftName = Tok.getString();
if (ShiftName == "lsl" || ShiftName == "LSL")
St = Lsl;
else if (ShiftName == "lsr" || ShiftName == "LSR")
@@ -549,7 +549,7 @@ bool ARMAsmParser::ParseShift(ShiftType &St,
}
/// A hack to allow some testing, to be replaced by a real table gen version.
-int ARMAsmParser::MatchRegisterName(const StringRef &Name) {
+int ARMAsmParser::MatchRegisterName(StringRef Name) {
if (Name == "r0" || Name == "R0")
return 0;
else if (Name == "r1" || Name == "R1")
@@ -593,7 +593,7 @@ MatchInstruction(const SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCInst &Inst) {
ARMOperand &Op0 = *(ARMOperand*)Operands[0];
assert(Op0.Kind == ARMOperand::Token && "First operand not a Token");
- const StringRef &Mnemonic = Op0.getToken();
+ StringRef Mnemonic = Op0.getToken();
if (Mnemonic == "add" ||
Mnemonic == "stmfd" ||
Mnemonic == "str" ||
@@ -658,7 +658,7 @@ bool ARMAsmParser::ParseOperand(OwningPtr<ARMOperand> &Op) {
}
/// Parse an arm instruction mnemonic followed by its operands.
-bool ARMAsmParser::ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
OwningPtr<ARMOperand> Op;
ARMOperand::CreateToken(Op, Name, NameLoc);
@@ -761,7 +761,7 @@ bool ARMAsmParser::ParseDirectiveSyntax(SMLoc L) {
const AsmToken &Tok = Parser.getTok();
if (Tok.isNot(AsmToken::Identifier))
return Error(L, "unexpected token in .syntax directive");
- const StringRef &Mode = Tok.getString();
+ StringRef Mode = Tok.getString();
if (Mode == "unified" || Mode == "UNIFIED")
Parser.Lex();
else if (Mode == "divided" || Mode == "DIVIDED")
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 6a40cf3..946f474 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -602,12 +602,8 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op,
O << "[" << getRegisterName(MO1.getReg());
if (MO2.getImm()) {
- unsigned Align = MO2.getImm();
- assert((Align == 8 || Align == 16 || Align == 32) &&
- "unexpected NEON load/store alignment");
- Align <<= 3;
// FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << Align;
+ O << ", :" << (MO2.getImm() << 3);
}
O << "]";
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index 170819a..edc9345 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -442,7 +442,7 @@ void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum,
O << "[" << getRegisterName(MO1.getReg());
if (MO2.getImm()) {
// FIXME: Both darwin as and GNU as violate ARM docs here.
- O << ", :" << MO2.getImm();
+ O << ", :" << (MO2.getImm() << 3);
}
O << "]";
}
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 85d5ca0..0cb8ff0 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -590,3 +590,70 @@ than the Z bit, we'll need additional logic to reverse the conditionals
associated with the comparison. Perhaps a pseudo-instruction for the comparison,
with a post-codegen pass to clean up and handle the condition codes?
See PR5694 for testcase.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on armv5:
+int test1(int A, int B) {
+ return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+ ldr r2, .LCPI0_0
+ and r0, r0, r2
+ ldr r2, .LCPI0_1
+ and r1, r1, r2
+ orr r0, r1, r0
+ bx lr
+
+We should be able to replace the second ldr+and with a bic (i.e. reuse the
+constant which was already loaded). Not sure what's necessary to do that.
+
+//===---------------------------------------------------------------------===//
+
+Given the following on ARMv7:
+int test1(int A, int B) {
+ return (A&-8388481)|(B&8388480);
+}
+
+We currently generate:
+ bfc r0, #7, #16
+ movw r2, #:lower16:8388480
+ movt r2, #:upper16:8388480
+ and r1, r1, r2
+ orr r0, r1, r0
+ bx lr
+
+The following is much shorter:
+ lsr r1, r1, #7
+ bfi r0, r1, #7, #16
+ bx lr
+
+
+//===---------------------------------------------------------------------===//
+
+The code generated for bswap on armv4/5 (CPUs without rev) is less than ideal:
+
+int a(int x) { return __builtin_bswap32(x); }
+
+a:
+ mov r1, #255, 24
+ mov r2, #255, 16
+ and r1, r1, r0, lsr #8
+ and r2, r2, r0, lsl #8
+ orr r1, r1, r0, lsr #24
+ orr r0, r2, r0, lsl #24
+ orr r0, r0, r1
+ bx lr
+
+Something like the following would be better (fewer instructions/registers):
+ eor r1, r0, r0, ror #16
+ bic r1, r1, #0xff0000
+ mov r1, r1, lsr #8
+ eor r0, r1, r0, ror #8
+ bx lr
+
+A custom Thumb version would also be a slight improvement over the generic
+version.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
index c67c6a2..a35e884 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.cpp
@@ -14,7 +14,7 @@
#include "AlphaMCAsmInfo.h"
using namespace llvm;
-AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, const StringRef &TT) {
+AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) {
AlignmentIsInBytes = false;
PrivateGlobalPrefix = "$";
GPRel32Directive = ".gprel32";
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/AlphaMCAsmInfo.h
index c27065d..837844b 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.h
+++ b/lib/Target/Alpha/AlphaMCAsmInfo.h
@@ -14,14 +14,14 @@
#ifndef ALPHATARGETASMINFO_H
#define ALPHATARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
struct AlphaMCAsmInfo : public MCAsmInfo {
- explicit AlphaMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit AlphaMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
index 31470fb..5b9d4a2 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
@@ -15,7 +15,7 @@
using namespace llvm;
-BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, const StringRef &TT) {
+BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) {
GlobalPrefix = "_";
CommentString = "//";
HasSetDirective = false;
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
index 0efc295..c372aa2 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h
+++ b/lib/Target/Blackfin/BlackfinMCAsmInfo.h
@@ -14,14 +14,14 @@
#ifndef BLACKFINTARGETASMINFO_H
#define BLACKFINTARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
struct BlackfinMCAsmInfo : public MCAsmInfo {
- explicit BlackfinMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit BlackfinMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
index 68445cf..25ba88a 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp
@@ -14,7 +14,7 @@
#include "SPUMCAsmInfo.h"
using namespace llvm;
-SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) {
+SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
ZeroDirective = "\t.space\t";
Data64bitsDirective = "\t.quad\t";
AlignmentIsInBytes = false;
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/SPUMCAsmInfo.h
index 8d75ea8..7f850d3 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/SPUMCAsmInfo.h
@@ -14,14 +14,14 @@
#ifndef SPUTARGETASMINFO_H
#define SPUTARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
struct SPULinuxMCAsmInfo : public MCAsmInfo {
- explicit SPULinuxMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit SPULinuxMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
index 7ae465d..4abeb2e 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -14,7 +14,7 @@
#include "MBlazeMCAsmInfo.h"
using namespace llvm;
-MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) {
+MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, StringRef TT) {
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
index bccb418..9d6ff3a 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -14,15 +14,15 @@
#ifndef MBLAZETARGETASMINFO_H
#define MBLAZETARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
class MBlazeMCAsmInfo : public MCAsmInfo {
public:
- explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit MBlazeMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 8f97d25..cc350e8 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -1621,8 +1621,7 @@ const char* MSILWriter::getLibraryName(const GlobalVariable* GV) {
}
-const char* MSILWriter::getLibraryForSymbol(const StringRef &Name,
- bool isFunction,
+const char* MSILWriter::getLibraryForSymbol(StringRef Name, bool isFunction,
CallingConv::ID CallingConv) {
// TODO: Read *.def file with function and libraries definitions.
return "MSVCRT.DLL";
diff --git a/lib/Target/MSIL/MSILWriter.h b/lib/Target/MSIL/MSILWriter.h
index a95ae23..92a3abe 100644
--- a/lib/Target/MSIL/MSILWriter.h
+++ b/lib/Target/MSIL/MSILWriter.h
@@ -246,7 +246,7 @@ namespace llvm {
const char* getLibraryName(const GlobalVariable* GV);
- const char* getLibraryForSymbol(const StringRef &Name, bool isFunction,
+ const char* getLibraryForSymbol(StringRef Name, bool isFunction,
CallingConv::ID CallingConv);
void printExternals();
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index cfb499d..3f44944 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -14,7 +14,7 @@
#include "MSP430MCAsmInfo.h"
using namespace llvm;
-MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
+MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
PrivateGlobalPrefix = ".L";
WeakRefDirective ="\t.weak\t";
PCSymbol=".";
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MSP430MCAsmInfo.h
index 8318029..f3138a2 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.h
@@ -14,13 +14,14 @@
#ifndef MSP430TARGETASMINFO_H
#define MSP430TARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
+
struct MSP430MCAsmInfo : public MCAsmInfo {
- explicit MSP430MCAsmInfo(const Target &T, const StringRef &TT);
+ explicit MSP430MCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MipsMCAsmInfo.cpp
index 89e3e11..fe48ab7 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MipsMCAsmInfo.cpp
@@ -14,7 +14,7 @@
#include "MipsMCAsmInfo.h"
using namespace llvm;
-MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, const StringRef &TT) {
+MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
AlignmentIsInBytes = false;
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MipsMCAsmInfo.h
index 33a4b5e..15a867e 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MipsMCAsmInfo.h
@@ -14,15 +14,15 @@
#ifndef MIPSTARGETASMINFO_H
#define MIPSTARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
class MipsMCAsmInfo : public MCAsmInfo {
public:
- explicit MipsMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit MipsMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 6a4d0d6..7a948de 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -416,7 +416,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
if (!TagName.empty()) Tmp += ", " + TagName;
for (int i = 0; i<Num; i++)
- Tmp += "," + utostr(Aux[i] && 0xff);
+ Tmp += "," + utostr(Aux[i] & 0xff);
OS.EmitRawText("\n\t.dim " + Twine(VarName) + ", 1" + Tmp);
}
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.cpp b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
index b080542..1bcc497 100644
--- a/lib/Target/PIC16/PIC16MCAsmInfo.cpp
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.cpp
@@ -20,7 +20,7 @@
#include "PIC16ISelLowering.h"
using namespace llvm;
-PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, const StringRef &TT) {
+PIC16MCAsmInfo::PIC16MCAsmInfo(const Target &T, StringRef TT) {
CommentString = ";";
GlobalPrefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
GlobalDirective = "\tglobal\t";
diff --git a/lib/Target/PIC16/PIC16MCAsmInfo.h b/lib/Target/PIC16/PIC16MCAsmInfo.h
index e84db85..6e1c111 100644
--- a/lib/Target/PIC16/PIC16MCAsmInfo.h
+++ b/lib/Target/PIC16/PIC16MCAsmInfo.h
@@ -25,7 +25,7 @@ namespace llvm {
const char *RomData16bitsDirective;
const char *RomData32bitsDirective;
public:
- PIC16MCAsmInfo(const Target &T, const StringRef &TT);
+ PIC16MCAsmInfo(const Target &T, StringRef TT);
virtual const char *getDataASDirective(unsigned size, unsigned AS) const;
};
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/SparcMCAsmInfo.cpp
index 535c6f7..d37d6d2 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/SparcMCAsmInfo.cpp
@@ -12,10 +12,9 @@
//===----------------------------------------------------------------------===//
#include "SparcMCAsmInfo.h"
-#include "llvm/ADT/SmallVector.h"
using namespace llvm;
-SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, const StringRef &TT) {
+SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
Data16bitsDirective = "\t.half\t";
Data32bitsDirective = "\t.word\t";
Data64bitsDirective = 0; // .xword is only supported by V9.
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/SparcMCAsmInfo.h
index 12d6ef4..0cb6827 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/SparcMCAsmInfo.h
@@ -14,13 +14,14 @@
#ifndef SPARCTARGETASMINFO_H
#define SPARCTARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
+
struct SparcELFMCAsmInfo : public MCAsmInfo {
- explicit SparcELFMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit SparcELFMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
index f9ccc47..4f7f70b 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
@@ -16,7 +16,7 @@
#include "llvm/MC/MCSectionELF.h"
using namespace llvm;
-SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, const StringRef &TT) {
+SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
PrivateGlobalPrefix = ".L";
WeakRefDirective = "\t.weak\t";
PCSymbol = ".";
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/SystemZMCAsmInfo.h
index 87908f2..a6a27e2 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/SystemZMCAsmInfo.h
@@ -21,7 +21,7 @@ namespace llvm {
class StringRef;
struct SystemZMCAsmInfo : public MCAsmInfo {
- explicit SystemZMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit SystemZMCAsmInfo(const Target &T, StringRef TT);
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
};
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index df52368..47c91df 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -294,7 +294,7 @@ namespace llvm {
/// option is specified on the command line. If this returns false (default),
/// the code generator is not allowed to assume that FP arithmetic arguments
/// and results are never NaNs or +-Infs.
- bool FiniteOnlyFPMath() { return UnsafeFPMath || FiniteOnlyFPMathOption; }
+ bool FiniteOnlyFPMath() { return FiniteOnlyFPMathOption; }
/// HonorSignDependentRoundingFPMath - Return true if the codegen must assume
/// that the rounding mode of the FPU can change from its default.
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index a856e9c..f1e66ab 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -65,7 +65,7 @@ public:
X86ATTAsmParser(const Target &T, MCAsmParser &_Parser)
: TargetAsmParser(T), Parser(_Parser) {}
- virtual bool ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
@@ -602,7 +602,7 @@ X86Operand *X86ATTAsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) {
}
bool X86ATTAsmParser::
-ParseInstruction(const StringRef &Name, SMLoc NameLoc,
+ParseInstruction(StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The various flavors of pushf and popf use Requires<In32BitMode> and
// Requires<In64BitMode>, but the assembler doesn't yet implement that.
@@ -612,6 +612,8 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc,
return Error(NameLoc, "popfl cannot be encoded in 64-bit mode");
else if (Name == "pushfl")
return Error(NameLoc, "pushfl cannot be encoded in 64-bit mode");
+ else if (Name == "pusha")
+ return Error(NameLoc, "pusha cannot be encoded in 64-bit mode");
} else {
if (Name == "popfq")
return Error(NameLoc, "popfq cannot be encoded in 32-bit mode");
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index 73bc603..08e6486 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -17,7 +17,6 @@
#include "X86IntelInstPrinter.h"
#include "X86MCInstLower.h"
#include "X86.h"
-#include "X86COFF.h"
#include "X86COFFMachineModuleInfo.h"
#include "X86MachineFunctionInfo.h"
#include "X86TargetMachine.h"
@@ -35,6 +34,7 @@
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/COFF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetOptions.h"
@@ -60,8 +60,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (Subtarget->isTargetCOFF()) {
bool Intrn = MF.getFunction()->hasInternalLinkage();
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::C_STAT : COFF::C_EXT);
- OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
+ : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
OutStreamer.EndCOFFSymbolDef();
}
@@ -582,8 +584,9 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
E = COFFMMI.externals_end();
I != E; ++I) {
OutStreamer.BeginCOFFSymbolDef(CurrentFnSym);
- OutStreamer.EmitCOFFSymbolStorageClass(COFF::C_EXT);
- OutStreamer.EmitCOFFSymbolType(COFF::DT_FCN << COFF::N_BTSHFT);
+ OutStreamer.EmitCOFFSymbolStorageClass(COFF::IMAGE_SYM_CLASS_EXTERNAL);
+ OutStreamer.EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
+ << COFF::SCT_COMPLEX_TYPE_SHIFT);
OutStreamer.EndCOFFSymbolDef();
}
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 09f150b..e67fc06 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -154,15 +154,13 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
case X86II::MO_TLVP: RefKind = MCSymbolRefExpr::VK_TLVP; break;
case X86II::MO_TLVP_PIC_BASE:
- Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
- // Subtract the pic base.
- Expr
- = MCBinaryExpr::CreateSub(Expr,
- MCSymbolRefExpr::Create(GetPICBaseSymbol(),
- Ctx),
- Ctx);
-
- break;
+ Expr = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_TLVP, Ctx);
+ // Subtract the pic base.
+ Expr = MCBinaryExpr::CreateSub(Expr,
+ MCSymbolRefExpr::Create(GetPICBaseSymbol(),
+ Ctx),
+ Ctx);
+ break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
diff --git a/lib/Target/X86/X86COFF.h b/lib/Target/X86/X86COFF.h
deleted file mode 100644
index 0a8e4e6..0000000
--- a/lib/Target/X86/X86COFF.h
+++ /dev/null
@@ -1,95 +0,0 @@
-//===--- X86COFF.h - Some definitions from COFF documentations ------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file just defines some symbols found in COFF documentation. They are
-// used to emit function type information for COFF targets (Cygwin/Mingw32).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef X86COFF_H
-#define X86COFF_H
-
-namespace COFF
-{
-/// Storage class tells where and what the symbol represents
-enum StorageClass {
- C_EFCN = -1, ///< Physical end of function
- C_NULL = 0, ///< No symbol
- C_AUTO = 1, ///< External definition
- C_EXT = 2, ///< External symbol
- C_STAT = 3, ///< Static
- C_REG = 4, ///< Register variable
- C_EXTDEF = 5, ///< External definition
- C_LABEL = 6, ///< Label
- C_ULABEL = 7, ///< Undefined label
- C_MOS = 8, ///< Member of structure
- C_ARG = 9, ///< Function argument
- C_STRTAG = 10, ///< Structure tag
- C_MOU = 11, ///< Member of union
- C_UNTAG = 12, ///< Union tag
- C_TPDEF = 13, ///< Type definition
- C_USTATIC = 14, ///< Undefined static
- C_ENTAG = 15, ///< Enumeration tag
- C_MOE = 16, ///< Member of enumeration
- C_REGPARM = 17, ///< Register parameter
- C_FIELD = 18, ///< Bit field
-
- C_BLOCK = 100, ///< ".bb" or ".eb" - beginning or end of block
- C_FCN = 101, ///< ".bf" or ".ef" - beginning or end of function
- C_EOS = 102, ///< End of structure
- C_FILE = 103, ///< File name
- C_LINE = 104, ///< Line number, reformatted as symbol
- C_ALIAS = 105, ///< Duplicate tag
- C_HIDDEN = 106 ///< External symbol in dmert public lib
-};
-
-/// The type of the symbol. This is made up of a base type and a derived type.
-/// For example, pointer to int is "pointer to T" and "int"
-enum SymbolType {
- T_NULL = 0, ///< No type info
- T_ARG = 1, ///< Void function argument (only used by compiler)
- T_VOID = 1, ///< The same as above. Just named differently in some specs.
- T_CHAR = 2, ///< Character
- T_SHORT = 3, ///< Short integer
- T_INT = 4, ///< Integer
- T_LONG = 5, ///< Long integer
- T_FLOAT = 6, ///< Floating point
- T_DOUBLE = 7, ///< Double word
- T_STRUCT = 8, ///< Structure
- T_UNION = 9, ///< Union
- T_ENUM = 10, ///< Enumeration
- T_MOE = 11, ///< Member of enumeration
- T_UCHAR = 12, ///< Unsigned character
- T_USHORT = 13, ///< Unsigned short
- T_UINT = 14, ///< Unsigned integer
- T_ULONG = 15 ///< Unsigned long
-};
-
-/// Derived type of symbol
-enum SymbolDerivedType {
- DT_NON = 0, ///< No derived type
- DT_PTR = 1, ///< Pointer to T
- DT_FCN = 2, ///< Function returning T
- DT_ARY = 3 ///< Array of T
-};
-
-/// Masks for extracting parts of type
-enum SymbolTypeMasks {
- N_BTMASK = 017, ///< Mask for base type
- N_TMASK = 060 ///< Mask for derived type
-};
-
-/// Offsets of parts of type
-enum Shifts {
- N_BTSHFT = 4 ///< Type is formed as (base + derived << N_BTSHIFT)
-};
-
-}
-
-#endif // X86COFF_H
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index cdde24a..ce13707 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -540,7 +540,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
StubAM.GVOpFlags = GVFlags;
// Prepare for inserting code in the local-value area.
- MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea();
+ SavePoint SaveInsertPt = enterLocalValueArea();
if (TLI.getPointerTy() == MVT::i64) {
Opc = X86::MOV64rm;
@@ -1279,12 +1279,11 @@ bool X86FastISel::X86SelectTrunc(const Instruction *I) {
return false;
// First issue a copy to GR16_ABCD or GR32_ABCD.
- unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
unsigned CopyReg = createResultReg(CopyRC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg)
- .addReg(InputReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ CopyReg).addReg(InputReg);
// Then issue an extract_subreg.
unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 1a63474..b3c4886 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -2458,17 +2458,23 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
// If the tailcall address may be in a register, then make sure it's
// possible to register allocate for it. In 32-bit, the call address can
// only target EAX, EDX, or ECX since the tail call must be scheduled after
- // callee-saved registers are restored. In 64-bit, it's RAX, RCX, RDX, RSI,
- // RDI, R8, R9, R11.
- if (!isa<GlobalAddressSDNode>(Callee) &&
+ // callee-saved registers are restored. These happen to be the same
+ // registers used to pass 'inreg' arguments so watch out for those.
+ if (!Subtarget->is64Bit() &&
+ !isa<GlobalAddressSDNode>(Callee) &&
!isa<ExternalSymbolSDNode>(Callee)) {
- unsigned Limit = Subtarget->is64Bit() ? 8 : 3;
unsigned NumInRegs = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- if (VA.isRegLoc()) {
- if (++NumInRegs == Limit)
+ if (!VA.isRegLoc())
+ continue;
+ unsigned Reg = VA.getLocReg();
+ switch (Reg) {
+ default: break;
+ case X86::EAX: case X86::EDX: case X86::ECX:
+ if (++NumInRegs == 3)
return false;
+ break;
}
}
}
@@ -7993,7 +7999,6 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
unsigned immOpc,
unsigned LoadOpc,
unsigned CXchgOpc,
- unsigned copyOpc,
unsigned notOpc,
unsigned EAXreg,
TargetRegisterClass *RC,
@@ -8070,7 +8075,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
MIB.addReg(tt);
(*MIB).addOperand(*argOpers[valArgIndx]);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), EAXreg);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), EAXreg);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(CXchgOpc));
@@ -8081,7 +8086,7 @@ X86TargetLowering::EmitAtomicBitwiseWithCustomInserter(MachineInstr *bInstr,
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), destOper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(EAXreg);
// insert branch
@@ -8117,7 +8122,6 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
const TargetRegisterClass *RC = X86::GR32RegisterClass;
const unsigned LoadOpc = X86::MOV32rm;
- const unsigned copyOpc = X86::MOV32rr;
const unsigned NotOpc = X86::NOT32r;
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
const BasicBlock *LLVM_BB = MBB->getBasicBlock();
@@ -8227,14 +8231,14 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
MIB.addReg(t2);
(*MIB).addOperand(*argOpers[valArgIndx + 1]);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EDX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EDX);
MIB.addReg(t2);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::EBX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EBX);
MIB.addReg(t5);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), X86::ECX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::ECX);
MIB.addReg(t6);
MIB = BuildMI(newMBB, dl, TII->get(X86::LCMPXCHG8B));
@@ -8245,9 +8249,9 @@ X86TargetLowering::EmitAtomicBit6432WithCustomInserter(MachineInstr *bInstr,
(*MIB).setMemRefs(bInstr->memoperands_begin(),
bInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t3);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t3);
MIB.addReg(X86::EAX);
- MIB = BuildMI(newMBB, dl, TII->get(copyOpc), t4);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t4);
MIB.addReg(X86::EDX);
// insert branch
@@ -8326,12 +8330,12 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
unsigned t2 = F->getRegInfo().createVirtualRegister(X86::GR32RegisterClass);
if (argOpers[valArgIndx]->isReg())
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), t2);
else
MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), t2);
(*MIB).addOperand(*argOpers[valArgIndx]);
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), X86::EAX);
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), X86::EAX);
MIB.addReg(t1);
MIB = BuildMI(newMBB, dl, TII->get(X86::CMP32rr));
@@ -8353,7 +8357,7 @@ X86TargetLowering::EmitAtomicMinMaxWithCustomInserter(MachineInstr *mInstr,
(*MIB).setMemRefs(mInstr->memoperands_begin(),
mInstr->memoperands_end());
- MIB = BuildMI(newMBB, dl, TII->get(X86::MOV32rr), destOper.getReg());
+ MIB = BuildMI(newMBB, dl, TII->get(TargetOpcode::COPY), destOper.getReg());
MIB.addReg(X86::EAX);
// insert branch
@@ -8735,25 +8739,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR32rr,
X86::OR32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMXOR32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR32rr,
X86::XOR32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass);
case X86::ATOMNAND32:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND32rr,
X86::AND32ri, X86::MOV32rm,
- X86::LCMPXCHG32, X86::MOV32rr,
+ X86::LCMPXCHG32,
X86::NOT32r, X86::EAX,
X86::GR32RegisterClass, true);
case X86::ATOMMIN32:
@@ -8768,25 +8772,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR16rr,
X86::OR16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMXOR16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR16rr,
X86::XOR16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass);
case X86::ATOMNAND16:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND16rr,
X86::AND16ri, X86::MOV16rm,
- X86::LCMPXCHG16, X86::MOV16rr,
+ X86::LCMPXCHG16,
X86::NOT16r, X86::AX,
X86::GR16RegisterClass, true);
case X86::ATOMMIN16:
@@ -8801,25 +8805,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR8rr,
X86::OR8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMXOR8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR8rr,
X86::XOR8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass);
case X86::ATOMNAND8:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND8rr,
X86::AND8ri, X86::MOV8rm,
- X86::LCMPXCHG8, X86::MOV8rr,
+ X86::LCMPXCHG8,
X86::NOT8r, X86::AL,
X86::GR8RegisterClass, true);
// FIXME: There are no CMOV8 instructions; MIN/MAX need some other way.
@@ -8827,25 +8831,25 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
case X86::ATOMAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::OR64rr,
X86::OR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMXOR64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::XOR64rr,
X86::XOR64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass);
case X86::ATOMNAND64:
return EmitAtomicBitwiseWithCustomInserter(MI, BB, X86::AND64rr,
X86::AND64ri32, X86::MOV64rm,
- X86::LCMPXCHG64, X86::MOV64rr,
+ X86::LCMPXCHG64,
X86::NOT64r, X86::RAX,
X86::GR64RegisterClass, true);
case X86::ATOMMIN64:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 2d28e5c..4e4daa4 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -764,7 +764,6 @@ namespace llvm {
unsigned immOpc,
unsigned loadOpc,
unsigned cxchgOpc,
- unsigned copyOpc,
unsigned notOpc,
unsigned EAXreg,
TargetRegisterClass *RC,
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 97578af..cc3fdf1 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -106,6 +106,7 @@ class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
+class VEX_L { bit hasVEX_L = 1; }
class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
string AsmStr, Domain d = GenericDomain>
@@ -138,6 +139,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasVEX_4VPrefix = 0; // Does this inst requires the VEX.VVVV field?
bit hasVEX_i8ImmReg = 0; // Does this inst requires the last source register
// to be encoded in a immediate field?
+ bit hasVEX_L = 0; // Does this inst uses large (256-bit) registers?
// TSFlags layout should be kept in sync with X86InstrInfo.h.
let TSFlags{5-0} = FormBits;
@@ -155,6 +157,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{33} = hasVEX_WPrefix;
let TSFlags{34} = hasVEX_4VPrefix;
let TSFlags{35} = hasVEX_i8ImmReg;
+ let TSFlags{36} = hasVEX_L;
}
class I<bits<8> o, Format f, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f762b58..ad0217a 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -453,7 +453,13 @@ namespace X86II {
// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
// must be encoded in the i8 immediate field. This usually happens in
// instructions with 4 operands.
- VEX_I8IMM = 1ULL << 35
+ VEX_I8IMM = 1ULL << 35,
+
+ // VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
+ // instruction uses 256-bit wide registers. This is usually auto detected if
+ // a VR256 register is used, but some AVX instructions also have this field
+ // marked when using a f256 memory references.
+ VEX_L = 1ULL << 36
};
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index ab0005b..ebe161b 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -666,6 +666,9 @@ defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, f128mem, load,
"cvtdq2ps\t{$src, $dst|$dst, $src}",
SSEPackedSingle>, TB, VEX;
+defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, undef, f256mem, load,
+ "cvtdq2ps\t{$src, $dst|$dst, $src}",
+ SSEPackedSingle>, TB, VEX;
}
let Pattern = []<dag> in {
defm CVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load /*dummy*/,
@@ -806,9 +809,13 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
// Convert packed single/double fp to doubleword
let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
@@ -862,6 +869,10 @@ def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
+ "cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>;
@@ -912,14 +923,39 @@ def Int_CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
+let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
+def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrYr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTTPD2DQXrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvttpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
+}
+
// Convert packed single to packed double
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+ // SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
- Requires<[HasAVX]>;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX,
- Requires<[HasAVX]>;
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
}
def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
@@ -949,10 +985,25 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
// Convert packed double to packed single
let isAsmParserOnly = 1 in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
-// FIXME: the memory form of this instruction should described using
-// use extra asm syntax
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrYr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2ps\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2PSXrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "cvtpd2psx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
@@ -1142,6 +1193,16 @@ let isAsmParserOnly = 1 in {
"cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
SSEPackedDouble>, OpSize, VEX_4V;
+ let Pattern = []<dag> in {
+ defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ }
}
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
@@ -2935,19 +2996,46 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
// SSE3 - Conversion Instructions
//===---------------------------------------------------------------------===//
+// Convert Packed Double FP to Packed DW Integers
let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+// The assembler can recognize rr 256-bit instructions by seeing a ymm
+// register, but the same isn't true when using memory operands instead.
+// Provide other assembly rr and rm forms to address this explicitly.
def VCVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
-def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrYr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dq\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// XMM only
+def VCVTPD2DQXrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQXrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtpd2dqx\t{$src, $dst|$dst, $src}", []>, VEX;
+
+// YMM only
+def VCVTPD2DQYrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTPD2DQYrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
+ "vcvtpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
}
def CVTPD2DQrm : S3DI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
+
+// Convert Packed DW Integers to Packed Double FP
+let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrm : S3SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+def VCVTDQ2PDYrr : S3SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
+ "vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+
def CVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}", []>;
def CVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 633ddd4..23b0666 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -432,6 +432,9 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::VEX_W)
VEX_W = 1;
+ if (TSFlags & X86II::VEX_L)
+ VEX_L = 1;
+
switch (TSFlags & X86II::Op0Mask) {
default: assert(0 && "Invalid prefix!");
case X86II::T8: // 0F 38
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/XCoreMCAsmInfo.cpp
index 5f6feae..42ab1b3 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/XCoreMCAsmInfo.cpp
@@ -10,7 +10,7 @@
#include "XCoreMCAsmInfo.h"
using namespace llvm;
-XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, const StringRef &TT) {
+XCoreMCAsmInfo::XCoreMCAsmInfo(const Target &T, StringRef TT) {
SupportsDebugInformation = true;
Data16bitsDirective = "\t.short\t";
Data32bitsDirective = "\t.long\t";
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/XCoreMCAsmInfo.h
index 01f8e48..8403922 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/XCoreMCAsmInfo.h
@@ -14,14 +14,15 @@
#ifndef XCORETARGETASMINFO_H
#define XCORETARGETASMINFO_H
+#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
- class StringRef;
+
class XCoreMCAsmInfo : public MCAsmInfo {
public:
- explicit XCoreMCAsmInfo(const Target &T, const StringRef &TT);
+ explicit XCoreMCAsmInfo(const Target &T, StringRef TT);
};
} // namespace llvm
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index 027a220..9bb01f5 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -399,7 +399,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) {
// We can only inline direct calls to non-declarations.
if (Callee == 0 || Callee->isDeclaration()) continue;
- // If this call sites was obtained by inlining another function, verify
+ // If this call site was obtained by inlining another function, verify
// that the include path for the function did not include the callee
// itself. If so, we'd be recursively inlinling the same function,
// which would provide the same callsites, which would cause us to
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index 55d5e2a..aeeafe7 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -603,6 +603,10 @@ static void ThunkGToF(Function *F, Function *G) {
}
static void AliasGToF(Function *F, Function *G) {
+ // Darwin will trigger llvm_unreachable if asked to codegen an alias.
+ return ThunkGToF(F, G);
+
+#if 0
if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage())
return ThunkGToF(F, G);
@@ -614,6 +618,7 @@ static void AliasGToF(Function *F, Function *G) {
GA->setVisibility(G->getVisibility());
G->replaceAllUsesWith(GA);
G->eraseFromParent();
+#endif
}
static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) {
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 3f4a857..5876f40 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -472,6 +472,25 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
Value *NewOr = Builder->CreateOr(Val, Val2);
return Builder->CreateICmp(LHSCC, NewOr, LHSCst);
}
+
+ // (icmp ne (A & C1), 0) & (icmp ne (A & C2), 0) -->
+ // (icmp eq (A & (C1|C2)), (C1|C2))
+ if (LHSCC == ICmpInst::ICMP_NE && LHSCst->isZero()) {
+ Instruction *I1 = dyn_cast<Instruction>(Val);
+ Instruction *I2 = dyn_cast<Instruction>(Val2);
+ if (I1 && I1->getOpcode() == Instruction::And &&
+ I2 && I2->getOpcode() == Instruction::And &&
+ I1->getOperand(0) == I1->getOperand(0)) {
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(I1->getOperand(1));
+ ConstantInt *CI2 = dyn_cast<ConstantInt>(I2->getOperand(1));
+ if (CI1 && !CI1->isZero() && CI2 && !CI2->isZero() &&
+ CI1->getValue().operator&(CI2->getValue()) == 0) {
+ Constant *ConstOr = ConstantExpr::getOr(CI1, CI2);
+ Value *NewAnd = Builder->CreateAnd(I1->getOperand(0), ConstOr);
+ return Builder->CreateICmp(ICmpInst::ICMP_EQ, NewAnd, ConstOr);
+ }
+ }
+ }
}
// From here on, we only handle:
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index c44fe9d..f9ffdb1 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -699,6 +699,34 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
SI.setOperand(2, TrueVal);
return &SI;
}
+
+ // select (A == 0 | B == 0), T, F--> select (A != 0 & B != 0), F, T
+ // Note: This is a canonicalization rather than an optimization, and is used
+ // to expose opportunities to other instcombine transforms.
+ Instruction* CondInst = dyn_cast<Instruction>(CondVal);
+ if (CondInst && CondInst->hasOneUse() &&
+ CondInst->getOpcode() == Instruction::Or) {
+ ICmpInst *LHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(0));
+ ICmpInst *RHSCmp = dyn_cast<ICmpInst>(CondInst->getOperand(1));
+ if (LHSCmp && LHSCmp->hasOneUse() &&
+ LHSCmp->getPredicate() == ICmpInst::ICMP_EQ &&
+ RHSCmp && RHSCmp->hasOneUse() &&
+ RHSCmp->getPredicate() == ICmpInst::ICMP_EQ) {
+ ConstantInt* C1 = dyn_cast<ConstantInt>(LHSCmp->getOperand(1));
+ ConstantInt* C2 = dyn_cast<ConstantInt>(RHSCmp->getOperand(1));
+ if (C1 && C1->isZero() && C2 && C2->isZero()) {
+ LHSCmp->setPredicate(ICmpInst::ICMP_NE);
+ RHSCmp->setPredicate(ICmpInst::ICMP_NE);
+ Value *And =
+ InsertNewInstBefore(BinaryOperator::CreateAnd(LHSCmp, RHSCmp,
+ "and."+CondVal->getName()), SI);
+ SI.setOperand(0, And);
+ SI.setOperand(1, FalseVal);
+ SI.setOperand(2, TrueVal);
+ return &SI;
+ }
+ }
+ }
return 0;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index a250a88..1f9b415 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -2362,7 +2362,7 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
Formula Base) {
// TODO: For now, just add the min and max offset, because it usually isn't
// worthwhile looking at everything inbetween.
- SmallVector<int64_t, 4> Worklist;
+ SmallVector<int64_t, 2> Worklist;
Worklist.push_back(LU.MinOffset);
if (LU.MaxOffset != LU.MinOffset)
Worklist.push_back(LU.MaxOffset);
@@ -2376,7 +2376,14 @@ void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs - *I;
if (isLegalUse(F.AM, LU.MinOffset - *I, LU.MaxOffset - *I,
LU.Kind, LU.AccessTy, TLI)) {
- F.BaseRegs[i] = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+ // Add the offset to the base register.
+ const SCEV *NewG = SE.getAddExpr(G, SE.getConstant(G->getType(), *I));
+ // If it cancelled out, drop the base register, otherwise update it.
+ if (NewG->isZero()) {
+ std::swap(F.BaseRegs[i], F.BaseRegs.back());
+ F.BaseRegs.pop_back();
+ } else
+ F.BaseRegs[i] = NewG;
(void)InsertFormula(LU, LUIdx, F);
}
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0b48a8f..8e91138 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -306,7 +306,7 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB, const TargetData *TD) {
WeakVH BIHandle(BI);
ReplaceAndSimplifyAllUses(Inst, V, TD);
MadeChange = true;
- if (BIHandle == 0)
+ if (BIHandle != BI)
BI = BB->begin();
continue;
}
@@ -354,12 +354,13 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
// value into all of its uses.
assert(PNV != PN && "hasConstantValue broken");
+ Value *OldPhiIt = PhiIt;
ReplaceAndSimplifyAllUses(PN, PNV, TD);
// If recursive simplification ended up deleting the next PHI node we would
// iterate to, then our iterator is invalid, restart scanning from the top
// of the block.
- if (PhiIt == 0) PhiIt = &BB->front();
+ if (PhiIt != OldPhiIt) PhiIt = &BB->front();
}
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index fd3ed3e..27b07d9 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1377,8 +1377,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI) {
bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
BasicBlock *BB = BI->getParent();
Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
- if (Cond == 0) return false;
-
+ if (Cond == 0 || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
+ Cond->getParent() != BB || !Cond->hasOneUse())
+ return false;
// Only allow this if the condition is a simple instruction that can be
// executed unconditionally. It must be in the same block as the branch, and
@@ -1387,11 +1388,23 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
// Ignore dbg intrinsics.
while(isa<DbgInfoIntrinsic>(FrontIt))
++FrontIt;
- if ((!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
- Cond->getParent() != BB || &*FrontIt != Cond || !Cond->hasOneUse()) {
- return false;
+
+ // Allow a single instruction to be hoisted in addition to the compare
+ // that feeds the branch. We later ensure that any values that _it_ uses
+ // were also live in the predecessor, so that we don't unnecessarily create
+ // register pressure or inhibit out-of-order execution.
+ Instruction *BonusInst = 0;
+ if (&*FrontIt != Cond &&
+ FrontIt->hasOneUse() && *FrontIt->use_begin() == Cond &&
+ FrontIt->isSafeToSpeculativelyExecute()) {
+ BonusInst = &*FrontIt;
+ ++FrontIt;
}
+ // Only a single bonus inst is allowed.
+ if (&*FrontIt != Cond)
+ return false;
+
// Make sure the instruction after the condition is the cond branch.
BasicBlock::iterator CondIt = Cond; ++CondIt;
// Ingore dbg intrinsics.
@@ -1429,6 +1442,44 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
!SafeToMergeTerminators(BI, PBI))
continue;
+ // Ensure that any values used in the bonus instruction are also used
+ // by the terminator of the predecessor. This means that those values
+ // must already have been resolved, so we won't be inhibiting the
+ // out-of-order core by speculating them earlier.
+ if (BonusInst) {
+ // Collect the values used by the bonus inst
+ SmallPtrSet<Value*, 4> UsedValues;
+ for (Instruction::op_iterator OI = BonusInst->op_begin(),
+ OE = BonusInst->op_end(); OI != OE; ++OI) {
+ Value* V = *OI;
+ if (!isa<Constant>(V))
+ UsedValues.insert(V);
+ }
+
+ SmallVector<std::pair<Value*, unsigned>, 4> Worklist;
+ Worklist.push_back(std::make_pair(PBI->getOperand(0), 0));
+
+ // Walk up to four levels back up the use-def chain of the predecessor's
+ // terminator to see if all those values were used. The choice of four
+ // levels is arbitrary, to provide a compile-time-cost bound.
+ while (!Worklist.empty()) {
+ std::pair<Value*, unsigned> Pair = Worklist.back();
+ Worklist.pop_back();
+
+ if (Pair.second >= 4) continue;
+ UsedValues.erase(Pair.first);
+ if (UsedValues.empty()) break;
+
+ if (Instruction* I = dyn_cast<Instruction>(Pair.first)) {
+ for (Instruction::op_iterator OI = I->op_begin(), OE = I->op_end();
+ OI != OE; ++OI)
+ Worklist.push_back(std::make_pair(OI->get(), Pair.second+1));
+ }
+ }
+
+ if (!UsedValues.empty()) return false;
+ }
+
Instruction::BinaryOps Opc;
bool InvertPredCond = false;
@@ -1457,9 +1508,19 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
PBI->setSuccessor(1, OldTrue);
}
+ // If we have a bonus inst, clone it into the predecessor block.
+ Instruction *NewBonus = 0;
+ if (BonusInst) {
+ NewBonus = BonusInst->clone();
+ PredBlock->getInstList().insert(PBI, NewBonus);
+ NewBonus->takeName(BonusInst);
+ BonusInst->setName(BonusInst->getName()+".old");
+ }
+
// Clone Cond into the predecessor basic block, and or/and the
// two conditions together.
Instruction *New = Cond->clone();
+ if (BonusInst) New->replaceUsesOfWith(BonusInst, NewBonus);
PredBlock->getInstList().insert(PBI, New);
New->takeName(Cond);
Cond->setName(New->getName()+".old");
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 7a471ef..09b8aa5 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -90,8 +90,7 @@ enum PrefixType {
/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either
/// prefixed with % (if the string only contains simple characters) or is
/// surrounded with ""'s (if it has special chars in it). Print it out.
-static void PrintLLVMName(raw_ostream &OS, const StringRef &Name,
- PrefixType Prefix) {
+static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) {
assert(Name.data() && "Cannot get empty name!");
switch (Prefix) {
default: llvm_unreachable("Bad prefix!");
@@ -855,8 +854,9 @@ static void WriteOptimizationInfo(raw_ostream &Out, const User *U) {
}
}
-static void WriteConstantInt(raw_ostream &Out, const Constant *CV,
- TypePrinting &TypePrinter, SlotTracker *Machine) {
+static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
+ TypePrinting &TypePrinter,
+ SlotTracker *Machine) {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
if (CI->getType()->isIntegerTy(1)) {
Out << (CI->getZExtValue() ? "true" : "false");
@@ -1147,7 +1147,7 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
const Constant *CV = dyn_cast<Constant>(V);
if (CV && !isa<GlobalValue>(CV)) {
assert(TypePrinter && "Constants require TypePrinting!");
- WriteConstantInt(Out, CV, *TypePrinter, Machine);
+ WriteConstantInternal(Out, CV, *TypePrinter, Machine);
return;
}
@@ -2128,7 +2128,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
} else if (const MDNode *N = dyn_cast<MDNode>(this)) {
const Function *F = N->getFunction();
SlotTracker SlotTable(F);
- AssemblyWriter W(OS, SlotTable, F ? getModuleFromVal(F) : 0, AAW);
+ AssemblyWriter W(OS, SlotTable, F ? F->getParent() : 0, AAW);
W.printMDNodeBody(N);
} else if (const NamedMDNode *N = dyn_cast<NamedMDNode>(this)) {
SlotTracker SlotTable(N->getParent());
@@ -2138,7 +2138,7 @@ void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
TypePrinting TypePrinter;
TypePrinter.print(C->getType(), OS);
OS << ' ';
- WriteConstantInt(OS, C, TypePrinter, 0);
+ WriteConstantInternal(OS, C, TypePrinter, 0);
} else if (isa<InlineAsm>(this) || isa<MDString>(this) ||
isa<Argument>(this)) {
WriteAsOperand(OS, this, true, 0);
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 1d3a058..3100d4a 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -78,7 +78,8 @@ void MDNodeOperand::allUsesReplacedWith(Value *NV) {
/// getOperandPtr - Helper function to get the MDNodeOperand's coallocated on
/// the end of the MDNode.
static MDNodeOperand *getOperandPtr(MDNode *N, unsigned Op) {
- assert(Op < N->getNumOperands() && "Invalid operand number");
+ // Use <= instead of < to permit a one-past-the-end address.
+ assert(Op <= N->getNumOperands() && "Invalid operand number");
return reinterpret_cast<MDNodeOperand*>(N+1)+Op;
}
OpenPOWER on IntegriCloud