diff options
Diffstat (limited to 'lib')
97 files changed, 3619 insertions, 2664 deletions
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 96bb027..dda1fba 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -564,21 +564,6 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext())); - APInt BasePtr(BitWidth, 0); - bool BaseIsInt = true; - if (!Ptr->isNullValue()) { - // If this is a inttoptr from a constant int, we can fold this as the base, - // otherwise we can't. - if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) - if (CE->getOpcode() == Instruction::IntToPtr) - if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) { - BasePtr = Base->getValue(); - BasePtr.zextOrTrunc(BitWidth); - } - - if (BasePtr == 0) - BaseIsInt = false; - } // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' @@ -615,7 +600,14 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps, // If the base value for this address is a literal integer value, fold the // getelementptr to the resulting integer value casted to the pointer type. - if (BaseIsInt) { + APInt BasePtr(BitWidth, 0); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) + if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantInt *Base = dyn_cast<ConstantInt>(CE->getOperand(0))) { + BasePtr = Base->getValue(); + BasePtr.zextOrTrunc(BitWidth); + } + if (Ptr->isNullValue() || BasePtr != 0) { Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); return ConstantExpr::getIntToPtr(C, ResultTy); } @@ -1002,6 +994,8 @@ llvm::canConstantFoldCallTo(const Function *F) { case Intrinsic::usub_with_overflow: case Intrinsic::sadd_with_overflow: case Intrinsic::ssub_with_overflow: + case Intrinsic::convert_from_fp16: + case Intrinsic::convert_to_fp16: return true; default: return false; @@ -1082,6 +1076,15 @@ llvm::ConstantFoldCall(Function *F, const Type *Ty = F->getReturnType(); if (NumOperands == 1) { if (ConstantFP *Op = dyn_cast<ConstantFP>(Operands[0])) { + if (Name == "llvm.convert.to.fp16") { + APFloat Val(Op->getValueAPF()); + + bool lost = false; + Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost); + + return ConstantInt::get(F->getContext(), Val.bitcastToAPInt()); + } + if (!Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// Currently APFloat versions of these functions do not exist, so we use @@ -1166,6 +1169,20 @@ llvm::ConstantFoldCall(Function *F, return ConstantInt::get(Ty, Op->getValue().countTrailingZeros()); else if (Name.startswith("llvm.ctlz")) return ConstantInt::get(Ty, Op->getValue().countLeadingZeros()); + else if (Name == "llvm.convert.from.fp16") { + APFloat Val(Op->getValue()); + + bool lost = false; + APFloat::opStatus status = + Val.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &lost); + + // Conversion is always precise. + status = status; + assert(status == APFloat::opOK && !lost && + "Precision lost during fp16 constfolding"); + + return ConstantFP::get(F->getContext(), Val); + } return 0; } diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp index bb4f46d..e101947 100644 --- a/lib/Analysis/LoopDependenceAnalysis.cpp +++ b/lib/Analysis/LoopDependenceAnalysis.cpp @@ -119,8 +119,7 @@ bool LoopDependenceAnalysis::findOrInsertDependencePair(Value *A, P = Pairs.FindNodeOrInsertPos(id, insertPos); if (P) return true; - P = PairAllocator.Allocate<DependencePair>(); - new (P) DependencePair(id, A, B); + P = new (PairAllocator) DependencePair(id, A, B); Pairs.InsertNode(P, insertPos); return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 15f072d..1af271a 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -141,7 +141,7 @@ bool SCEV::isAllOnesValue() const { } SCEVCouldNotCompute::SCEVCouldNotCompute() : - SCEV(FoldingSetNodeID(), scCouldNotCompute) {} + SCEV(FoldingSetNodeIDRef(), scCouldNotCompute) {} bool SCEVCouldNotCompute::isLoopInvariant(const Loop *L) const { llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); @@ -177,8 +177,7 @@ const SCEV *ScalarEvolution::getConstant(ConstantInt *V) { ID.AddPointer(V); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVConstant>(); - new (S) SCEVConstant(ID, V); + SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -199,7 +198,7 @@ void SCEVConstant::print(raw_ostream &OS) const { WriteAsOperand(OS, V, false); } -SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeID &ID, +SCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, unsigned SCEVTy, const SCEV *op, const Type *ty) : SCEV(ID, SCEVTy), Op(op), Ty(ty) {} @@ -211,7 +210,7 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { return Op->properlyDominates(BB, DT); } -SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID, +SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scTruncate, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && @@ -223,7 +222,7 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const { OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } -SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID, +SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scZeroExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && @@ -235,7 +234,7 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const { OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")"; } -SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID, +SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, const SCEV *op, const Type *ty) : SCEVCastExpr(ID, scSignExtend, op, ty) { assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) && @@ -248,10 +247,10 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const { } void SCEVCommutativeExpr::print(raw_ostream &OS) const { - assert(Operands.size() > 1 && "This plus expr shouldn't exist!"); + assert(NumOperands > 1 && "This plus expr shouldn't exist!"); const char *OpStr = getOperationStr(); OS << "(" << *Operands[0]; - for (unsigned i = 1, e = Operands.size(); i != e; ++i) + for (unsigned i = 1, e = NumOperands; i != e; ++i) OS << OpStr << *Operands[i]; OS << ")"; } @@ -329,7 +328,7 @@ SCEVAddRecExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const { void SCEVAddRecExpr::print(raw_ostream &OS) const { OS << "{" << *Operands[0]; - for (unsigned i = 1, e = Operands.size(); i != e; ++i) + for (unsigned i = 1, e = NumOperands; i != e; ++i) OS << ",+," << *Operands[i]; OS << "}<"; WriteAsOperand(OS, L->getHeader(), /*PrintType=*/false); @@ -846,8 +845,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVTruncateExpr>(); - new (S) SCEVTruncateExpr(ID, Op, Ty); + SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), + Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -981,8 +980,8 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op, // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVZeroExtendExpr>(); - new (S) SCEVZeroExtendExpr(ID, Op, Ty); + SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -1116,8 +1115,8 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op, // The cast wasn't folded; create an explicit cast node. // Recompute the insert position, as it may have been invalidated. if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVSignExtendExpr>(); - new (S) SCEVSignExtendExpr(ID, Op, Ty); + SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), + Op, Ty); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -1202,23 +1201,23 @@ static bool CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, SmallVector<const SCEV *, 8> &NewOps, APInt &AccumulatedConstant, - const SmallVectorImpl<const SCEV *> &Ops, + const SCEV *const *Ops, size_t NumOperands, const APInt &Scale, ScalarEvolution &SE) { bool Interesting = false; // Iterate over the add operands. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + for (unsigned i = 0, e = NumOperands; i != e; ++i) { const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { APInt NewScale = Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { // A multiplication of a constant with another add; recurse. + const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); Interesting |= CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, - cast<SCEVAddExpr>(Mul->getOperand(1)) - ->getOperands(), + Add->op_begin(), Add->getNumOperands(), NewScale, SE); } else { // A multiplication of a constant with some other value. Update @@ -1427,7 +1426,8 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, SmallVector<const SCEV *, 8> NewOps; APInt AccumulatedConstant(BitWidth, 0); if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, - Ops, APInt(BitWidth, 1), *this)) { + Ops.data(), Ops.size(), + APInt(BitWidth, 1), *this)) { // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. @@ -1611,8 +1611,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, SCEVAddExpr *S = static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { - S = SCEVAllocator.Allocate<SCEVAddExpr>(); - new (S) SCEVAddExpr(ID, Ops); + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVAddExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } if (HasNUW) S->setHasNoUnsignedWrap(true); @@ -1819,8 +1821,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, SCEVMulExpr *S = static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { - S = SCEVAllocator.Allocate<SCEVMulExpr>(); - new (S) SCEVMulExpr(ID, Ops); + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); } if (HasNUW) S->setHasNoUnsignedWrap(true); @@ -1880,9 +1884,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, const SCEV *Op = M->getOperand(i); const SCEV *Div = getUDivExpr(Op, RHSC); if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { - const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); - Operands = SmallVector<const SCEV *, 4>(MOperands.begin(), - MOperands.end()); + Operands = SmallVector<const SCEV *, 4>(M->op_begin(), M->op_end()); Operands[i] = Div; return getMulExpr(Operands); } @@ -1921,8 +1923,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, ID.AddPointer(RHS); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVUDivExpr>(); - new (S) SCEVUDivExpr(ID, LHS, RHS); + SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), + LHS, RHS); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -2030,8 +2032,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, SCEVAddRecExpr *S = static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); if (!S) { - S = SCEVAllocator.Allocate<SCEVAddRecExpr>(); - new (S) SCEVAddRecExpr(ID, Operands, L); + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Operands.size()); + std::uninitialized_copy(Operands.begin(), Operands.end(), O); + S = new (SCEVAllocator) SCEVAddRecExpr(ID.Intern(SCEVAllocator), + O, Operands.size(), L); UniqueSCEVs.InsertNode(S, IP); } if (HasNUW) S->setHasNoUnsignedWrap(true); @@ -2130,8 +2134,10 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { ID.AddPointer(Ops[i]); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVSMaxExpr>(); - new (S) SCEVSMaxExpr(ID, Ops); + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -2227,8 +2233,10 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { ID.AddPointer(Ops[i]); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVUMaxExpr>(); - new (S) SCEVUMaxExpr(ID, Ops); + const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); + SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), + O, Ops.size()); UniqueSCEVs.InsertNode(S, IP); return S; } @@ -2290,8 +2298,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) { ID.AddPointer(V); void *IP = 0; if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; - SCEV *S = SCEVAllocator.Allocate<SCEVUnknown>(); - new (S) SCEVUnknown(ID, V); + SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V); UniqueSCEVs.InsertNode(S, IP); return S; } diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 3c2cbfb..138cdc6 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -232,9 +232,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEVConstant *FC = cast<SCEVConstant>(Factor); if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { - const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); - SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(), - MOperands.end()); + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); NewMulOps[0] = SE.getConstant(C->getValue()->getValue().sdiv( FC->getValue()->getValue())); @@ -249,9 +247,7 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Remainder = SE.getIntegerSCEV(0, SOp->getType()); if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && Remainder->isZero()) { - const SmallVectorImpl<const SCEV *> &MOperands = M->getOperands(); - SmallVector<const SCEV *, 4> NewMulOps(MOperands.begin(), - MOperands.end()); + SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); NewMulOps[i] = SOp; S = SE.getMulExpr(NewMulOps); return true; @@ -297,13 +293,11 @@ static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, SE.getAddExpr(NoAddRecs); // If it returned an add, use the operands. Otherwise it simplified // the sum into a single value, so just use that. + Ops.clear(); if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) - Ops = Add->getOperands(); - else { - Ops.clear(); - if (!Sum->isZero()) - Ops.push_back(Sum); - } + Ops.insert(Ops.end(), Add->op_begin(), Add->op_end()); + else if (!Sum->isZero()) + Ops.push_back(Sum); // Then append the addrecs. Ops.insert(Ops.end(), AddRecs.begin(), AddRecs.end()); } @@ -1060,10 +1054,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (CanonicalIV && SE.getTypeSizeInBits(CanonicalIV->getType()) > SE.getTypeSizeInBits(Ty)) { - const SmallVectorImpl<const SCEV *> &Ops = S->getOperands(); - SmallVector<const SCEV *, 4> NewOps(Ops.size()); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - NewOps[i] = SE.getAnyExtendExpr(Ops[i], CanonicalIV->getType()); + SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); + for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) + NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop())); BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); @@ -1078,8 +1071,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { - const SmallVectorImpl<const SCEV *> &SOperands = S->getOperands(); - SmallVector<const SCEV *, 4> NewOps(SOperands.begin(), SOperands.end()); + SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getIntegerSCEV(0, Ty); const SCEV *Rest = SE.getAddRecExpr(NewOps, L); @@ -1248,6 +1240,15 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { return LHS; } +Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty, + Instruction *I) { + BasicBlock::iterator IP = I; + while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) + ++IP; + Builder.SetInsertPoint(IP->getParent(), IP); + return expandCodeFor(SH, Ty); +} + Value *SCEVExpander::expandCodeFor(const SCEV *SH, const Type *Ty) { // Expand the code for this SCEV. Value *V = expand(SH); @@ -1286,9 +1287,7 @@ Value *SCEVExpander::expand(const SCEV *S) { // there) so that it is guaranteed to dominate any user inside the loop. if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop) InsertPt = L->getHeader()->getFirstNonPHI(); - while (isa<DbgInfoIntrinsic>(InsertPt)) - InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); - while (isInsertedInstruction(InsertPt)) + while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); break; } @@ -1324,7 +1323,8 @@ void SCEVExpander::rememberInstruction(Value *I) { // subsequently inserted code will be dominated. if (Builder.GetInsertPoint() == I) { BasicBlock::iterator It = cast<Instruction>(I); - do { ++It; } while (isInsertedInstruction(It)); + do { ++It; } while (isInsertedInstruction(It) || + isa<DbgInfoIntrinsic>(It)); Builder.SetInsertPoint(Builder.GetInsertBlock(), It); } } @@ -1332,7 +1332,7 @@ void SCEVExpander::rememberInstruction(Value *I) { void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { // If we acquired more instructions since the old insert point was saved, // advance past them. - while (isInsertedInstruction(I)) ++I; + while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I; Builder.SetInsertPoint(BB, I); } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index a328837..b9453c9 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -293,6 +293,8 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() { } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) { NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(), UserCS->getType()->isPacked()); + } else if (ConstantUnion *UserCU = dyn_cast<ConstantUnion>(UserC)) { + NewC = ConstantUnion::get(UserCU->getType(), NewOps[0]); } else if (isa<ConstantVector>(UserC)) { NewC = ConstantVector::get(&NewOps[0], NewOps.size()); } else { @@ -1015,6 +1017,11 @@ bool BitcodeReader::ParseConstants() { Elts.push_back(ValueList.getConstantFwdRef(Record[i], STy->getElementType(i))); V = ConstantStruct::get(STy, Elts); + } else if (const UnionType *UnTy = dyn_cast<UnionType>(CurTy)) { + uint64_t Index = Record[0]; + Constant *Val = ValueList.getConstantFwdRef(Record[1], + UnTy->getElementType(Index)); + V = ConstantUnion::get(UnTy, Val); } else if (const ArrayType *ATy = dyn_cast<ArrayType>(CurTy)) { const Type *EltTy = ATy->getElementType(); for (unsigned i = 0; i != Size; ++i) diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 82e73b5..3ab2726 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -808,11 +808,25 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, else if (isCStr7) AbbrevToUse = CString7Abbrev; } else if (isa<ConstantArray>(C) || isa<ConstantStruct>(V) || - isa<ConstantUnion>(C) || isa<ConstantVector>(V)) { + isa<ConstantVector>(V)) { Code = bitc::CST_CODE_AGGREGATE; for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) Record.push_back(VE.getValueID(C->getOperand(i))); AbbrevToUse = AggregateAbbrev; + } else if (isa<ConstantUnion>(C)) { + Code = bitc::CST_CODE_AGGREGATE; + + // Unions only have one entry but we must send type along with it. + const Type *EntryKind = C->getOperand(0)->getType(); + + const UnionType *UnTy = cast<UnionType>(C->getType()); + int UnionIndex = UnTy->getElementTypeIndex(EntryKind); + assert(UnionIndex != -1 && "Constant union contains invalid entry"); + + Record.push_back(UnionIndex); + Record.push_back(VE.getValueID(C->getOperand(0))); + + AbbrevToUse = AggregateAbbrev; } else if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { switch (CE->getOpcode()) { default: diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 2636e2c..1d4f7f7 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1138,6 +1138,21 @@ static void EmitGlobalConstantStruct(const ConstantStruct *CS, "Layout of constant struct may be incorrect!"); } +static void EmitGlobalConstantUnion(const ConstantUnion *CU, + unsigned AddrSpace, AsmPrinter &AP) { + const TargetData *TD = AP.TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CU->getType()); + + const Constant *Contents = CU->getOperand(0); + unsigned FilledSize = TD->getTypeAllocSize(Contents->getType()); + + // Print the actually filled part + AP.EmitGlobalConstant(Contents, AddrSpace); + + // And pad with enough zeroes + AP.OutStreamer.EmitZeros(Size-FilledSize, AddrSpace); +} + static void EmitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { // FP Constants are printed as integer constants to avoid losing @@ -1257,9 +1272,6 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) return EmitGlobalConstantFP(CFP, AddrSpace, *this); - - if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) - return EmitGlobalConstantVector(V, AddrSpace, *this); if (isa<ConstantPointerNull>(CV)) { unsigned Size = TM.getTargetData()->getTypeAllocSize(CV->getType()); @@ -1267,6 +1279,12 @@ void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { return; } + if (const ConstantUnion *CVU = dyn_cast<ConstantUnion>(CV)) + return EmitGlobalConstantUnion(CVU, AddrSpace, *this); + + if (const ConstantVector *V = dyn_cast<ConstantVector>(CV)) + return EmitGlobalConstantVector(V, AddrSpace, *this); + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. OutStreamer.EmitValue(LowerConstant(CV, *this), diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 866f457..7153fe2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -2531,8 +2531,8 @@ void DwarfDebug::emitDebugInfo() { Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - EmitSectionOffset(getTempLabel("abbrev_begin"),getTempLabel("section_abbrev"), - true, false); + EmitSectionOffset(getTempLabel("abbrev_begin"),getTempLabel("section_abbrev"), + true); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(TD->getPointerSize()); @@ -2842,8 +2842,8 @@ emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo) { Asm->OutStreamer.EmitLabel(DebugFrameBegin); Asm->OutStreamer.AddComment("FDE CIE offset"); - EmitSectionOffset(getTempLabel("debug_frame_common"), - getTempLabel("section_debug_frame"), true, false); + EmitSectionOffset(getTempLabel("debug_frame_common"), + getTempLabel("section_debug_frame"), true); Asm->OutStreamer.AddComment("FDE initial location"); MCSymbol *FuncBeginSym = getDWLabel("func_begin", DebugFrameInfo.Number); @@ -2878,8 +2878,7 @@ void DwarfDebug::emitDebugPubNames() { Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); EmitSectionOffset(getDWLabel("info_begin", ModuleCU->getID()), - getTempLabel("section_info"), - true, false); + getTempLabel("section_info"), true); Asm->OutStreamer.AddComment("Compilation Unit Length"); EmitDifference(getDWLabel("info_end", ModuleCU->getID()), @@ -2920,7 +2919,7 @@ void DwarfDebug::emitDebugPubTypes() { Asm->OutStreamer.AddComment("Offset of Compilation ModuleCU Info"); EmitSectionOffset(getDWLabel("info_begin", ModuleCU->getID()), - getTempLabel("section_info"), true, false); + getTempLabel("section_info"), true); Asm->OutStreamer.AddComment("Compilation ModuleCU Length"); EmitDifference(getDWLabel("info_end", ModuleCU->getID()), @@ -3068,8 +3067,8 @@ void DwarfDebug::emitDebugInlineInfo() { getTempLabel("section_str"), true); Asm->OutStreamer.AddComment("Function name"); - EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"), - false, true); + EmitSectionOffset(getStringPoolEntry(Name), getTempLabel("section_str"), + true); EmitULEB128(Labels.size(), "Inline count"); for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(), diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 13ae43d..151e9cd 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -192,7 +192,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, MadeChange |= MadeChangeThisIteration; } - // See if any jump tables have become mergable or dead as the code generator + // See if any jump tables have become dead as the code generator // did its thing. MachineJumpTableInfo *JTI = MF.getJumpTableInfo(); if (JTI == 0) { @@ -200,27 +200,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, return MadeChange; } - const std::vector<MachineJumpTableEntry> &JTs = JTI->getJumpTables(); - // Figure out how these jump tables should be merged. - std::vector<unsigned> JTMapping; - JTMapping.reserve(JTs.size()); - - // We always keep the 0th jump table. - JTMapping.push_back(0); - - // Scan the jump tables, seeing if there are any duplicates. Note that this - // is N^2, which should be fixed someday. - for (unsigned i = 1, e = JTs.size(); i != e; ++i) { - if (JTs[i].MBBs.empty()) - JTMapping.push_back(i); - else - JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs)); - } - - // If a jump table was merge with another one, walk the function rewriting - // references to jump tables to reference the new JT ID's. Keep track of - // whether we see a jump table idx, if not, we can delete the JT. - BitVector JTIsLive(JTs.size()); + // Walk the function to find jump tables that are live. + BitVector JTIsLive(JTI->getJumpTables().size()); for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) { for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); @@ -228,17 +209,14 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { MachineOperand &Op = I->getOperand(op); if (!Op.isJTI()) continue; - unsigned NewIdx = JTMapping[Op.getIndex()]; - Op.setIndex(NewIdx); // Remember that this JT is live. - JTIsLive.set(NewIdx); + JTIsLive.set(Op.getIndex()); } } - // Finally, remove dead jump tables. This happens either because the - // indirect jump was unreachable (and thus deleted) or because the jump - // table was merged with some other one. + // Finally, remove dead jump tables. This happens when the + // indirect jump was unreachable (and thus deleted). for (unsigned i = 0, e = JTIsLive.size(); i != e; ++i) if (!JTIsLive.test(i)) { JTI->RemoveJumpTable(i); @@ -1143,22 +1121,6 @@ ReoptimizeBlock: !IsBetterFallthrough(PriorTBB, MBB)) DoTransform = false; - // We don't want to do this transformation if we have control flow like: - // br cond BB2 - // BB1: - // .. - // jmp BBX - // BB2: - // .. - // ret - // - // In this case, we could actually be moving the return block *into* a - // loop! - if (DoTransform && !MBB->succ_empty() && - (!PriorTBB->canFallThrough() || PriorTBB->empty())) - DoTransform = false; - - if (DoTransform) { // Reverse the branch so we will fall through on the previous true cond. SmallVector<MachineOperand, 4> NewPriorCond(PriorCond); diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 1a23be0..6d7cc51 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -332,7 +332,7 @@ void MachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { MCSymbol *MachineCodeAnalysis::InsertLabel(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, DebugLoc DL) const { - MCSymbol *Label = MBB.getParent()->getContext().GetOrCreateTemporarySymbol(); + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); return Label; } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index dbb5e19..b3e9216 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -141,7 +141,7 @@ void LiveIntervals::printInstrs(raw_ostream &OS) const { for (MachineBasicBlock::iterator mii = mbbi->begin(), mie = mbbi->end(); mii != mie; ++mii) { if (mii->isDebugValue()) - OS << SlotIndex::getEmptyKey() << '\t' << *mii; + OS << " \t" << *mii; else OS << getInstructionIndex(mii) << '\t' << *mii; } @@ -583,6 +583,16 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // Look for kills, if it reaches a def before it's killed, then it shouldn't // be considered a livein. MachineBasicBlock::iterator mi = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + // Skip over DBG_VALUE at the start of the MBB. + if (mi != E && mi->isDebugValue()) { + while (++mi != E && mi->isDebugValue()) + ; + if (mi == E) + // MBB is empty except for DBG_VALUE's. + return; + } + SlotIndex baseIndex = MIIdx; SlotIndex start = baseIndex; if (getInstructionFromIndex(baseIndex) == 0) @@ -591,12 +601,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, SlotIndex end = baseIndex; bool SeenDefUse = false; - MachineBasicBlock::iterator E = MBB->end(); while (mi != E) { - while (mi != E && mi->isDebugValue()) - ++mi; - if (mi == E) - break; if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); end = baseIndex.getDefIndex(); @@ -613,10 +618,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, break; } - ++mi; - if (mi != E && !mi->isDebugValue()) { + while (++mi != E && mi->isDebugValue()) + // Skip over DBG_VALUE. + ; + if (mi != E) baseIndex = indexes_->getNextNonNullIndex(baseIndex); - } } // Live-in register might not be used at all. diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 37f3d22..5772b2f 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -88,18 +88,15 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM, unsigned FunctionNum, MCContext &ctx) : Fn(F), Target(TM), Ctx(ctx) { if (TM.getRegisterInfo()) - RegInfo = new (Allocator.Allocate<MachineRegisterInfo>()) - MachineRegisterInfo(*TM.getRegisterInfo()); + RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); else RegInfo = 0; MFInfo = 0; - FrameInfo = new (Allocator.Allocate<MachineFrameInfo>()) - MachineFrameInfo(*TM.getFrameInfo()); + FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo()); if (Fn->hasFnAttr(Attribute::StackAlignment)) FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs( Fn->getAttributes().getFnAttributes())); - ConstantPool = new (Allocator.Allocate<MachineConstantPool>()) - MachineConstantPool(TM.getTargetData()); + ConstantPool = new (Allocator) MachineConstantPool(TM.getTargetData()); Alignment = TM.getTargetLowering()->getFunctionAlignment(F); FunctionNumber = FunctionNum; JumpTableInfo = 0; @@ -132,7 +129,7 @@ MachineJumpTableInfo *MachineFunction:: getOrCreateJumpTableInfo(unsigned EntryKind) { if (JumpTableInfo) return JumpTableInfo; - JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>()) + JumpTableInfo = new (Allocator) MachineJumpTableInfo((MachineJumpTableInfo::JTEntryKind)EntryKind); return JumpTableInfo; } @@ -229,14 +226,13 @@ MachineMemOperand * MachineFunction::getMachineMemOperand(const Value *v, unsigned f, int64_t o, uint64_t s, unsigned base_alignment) { - return new (Allocator.Allocate<MachineMemOperand>()) - MachineMemOperand(v, f, o, s, base_alignment); + return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment); } MachineMemOperand * MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO, int64_t Offset, uint64_t Size) { - return new (Allocator.Allocate<MachineMemOperand>()) + return new (Allocator) MachineMemOperand(MMO->getValue(), MMO->getFlags(), int64_t(uint64_t(MMO->getOffset()) + uint64_t(Offset)), @@ -600,17 +596,15 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const TargetData &TD) const { return ~0; } -/// getJumpTableIndex - Create a new jump table entry in the jump table info -/// or return an existing one. +/// createJumpTableIndex - Create a new jump table entry in the jump table info. /// -unsigned MachineJumpTableInfo::getJumpTableIndex( +unsigned MachineJumpTableInfo::createJumpTableIndex( const std::vector<MachineBasicBlock*> &DestBBs) { assert(!DestBBs.empty() && "Cannot create an empty jump table!"); JumpTables.push_back(MachineJumpTableEntry(DestBBs)); return JumpTables.size()-1; } - /// ReplaceMBBInJumpTables - If Old is the target of any jump tables, update /// the jump tables to branch to New instead. bool MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old, diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp index 194fc14..2c69065 100644 --- a/lib/CodeGen/RegAllocLocal.cpp +++ b/lib/CodeGen/RegAllocLocal.cpp @@ -739,7 +739,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { // Physical registers and those that are not live-out of the block are // killed/dead at their last use/def within this block. - if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) + if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) { if (MO.isUse()) { // Don't mark uses that are tied to defs as kills. if (!MI->isRegTiedToDefOperand(idx)) @@ -747,6 +747,7 @@ void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { } else { MO.setIsDead(true); } + } } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index b5af2c1..63ca8e6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -851,8 +851,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: - case ISD::FP16_TO_FP32: - case ISD::FP32_TO_FP16: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8363c3a..ed5f24c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2167,7 +2167,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain, } else return LdOp; } else { - unsigned NumElts = WidenWidth / LdWidth; + unsigned NumElts = WidenWidth / NewVTWidth; EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 480c068..ed9146d 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -906,8 +906,7 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { return SDValue(N, 0); if (!N) { - N = NodeAllocator.Allocate<ConstantSDNode>(); - new (N) ConstantSDNode(isT, &Val, EltVT); + N = new (NodeAllocator) ConstantSDNode(isT, &Val, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -950,8 +949,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ return SDValue(N, 0); if (!N) { - N = NodeAllocator.Allocate<ConstantFPSDNode>(); - new (N) ConstantFPSDNode(isTarget, &V, EltVT); + N = new (NodeAllocator) ConstantFPSDNode(isTarget, &V, EltVT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); } @@ -1010,8 +1008,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>(); - new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags); + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT, + Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1026,8 +1024,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>(); - new (N) FrameIndexSDNode(FI, VT, isTarget); + SDNode *N = new (NodeAllocator) FrameIndexSDNode(FI, VT, isTarget); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1046,8 +1043,8 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>(); - new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags); + SDNode *N = new (NodeAllocator) JumpTableSDNode(JTI, VT, isTarget, + TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1072,8 +1069,8 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); - new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1099,8 +1096,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>(); - new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags); + SDNode *N = new (NodeAllocator) ConstantPoolSDNode(isTarget, C, VT, Offset, + Alignment, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1114,8 +1111,7 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>(); - new (N) BasicBlockSDNode(MBB); + SDNode *N = new (NodeAllocator) BasicBlockSDNode(MBB); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1130,8 +1126,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { ExtendedValueTypeNodes[VT] : ValueTypeNodes[VT.getSimpleVT().SimpleTy]; if (N) return SDValue(N, 0); - N = NodeAllocator.Allocate<VTSDNode>(); - new (N) VTSDNode(VT); + N = new (NodeAllocator) VTSDNode(VT); AllNodes.push_back(N); return SDValue(N, 0); } @@ -1139,8 +1134,7 @@ SDValue SelectionDAG::getValueType(EVT VT) { SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) { SDNode *&N = ExternalSymbols[Sym]; if (N) return SDValue(N, 0); - N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); - new (N) ExternalSymbolSDNode(false, Sym, 0, VT); + N = new (NodeAllocator) ExternalSymbolSDNode(false, Sym, 0, VT); AllNodes.push_back(N); return SDValue(N, 0); } @@ -1151,8 +1145,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT, TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym, TargetFlags)]; if (N) return SDValue(N, 0); - N = NodeAllocator.Allocate<ExternalSymbolSDNode>(); - new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); + N = new (NodeAllocator) ExternalSymbolSDNode(true, Sym, TargetFlags, VT); AllNodes.push_back(N); return SDValue(N, 0); } @@ -1162,8 +1155,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) { CondCodeNodes.resize(Cond+1); if (CondCodeNodes[Cond] == 0) { - CondCodeSDNode *N = NodeAllocator.Allocate<CondCodeSDNode>(); - new (N) CondCodeSDNode(Cond); + CondCodeSDNode *N = new (NodeAllocator) CondCodeSDNode(Cond); CondCodeNodes[Cond] = N; AllNodes.push_back(N); } @@ -1268,8 +1260,8 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, int *MaskAlloc = OperandAllocator.Allocate<int>(NElts); memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); - ShuffleVectorSDNode *N = NodeAllocator.Allocate<ShuffleVectorSDNode>(); - new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + ShuffleVectorSDNode *N = + new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1292,8 +1284,8 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>(); - new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code); + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5, + Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1307,8 +1299,7 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<RegisterSDNode>(); - new (N) RegisterSDNode(RegNo, VT); + SDNode *N = new (NodeAllocator) RegisterSDNode(RegNo, VT); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1323,8 +1314,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<EHLabelSDNode>(); - new (N) EHLabelSDNode(dl, Root, Label); + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1344,8 +1334,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>(); - new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags); + SDNode *N = new (NodeAllocator) BlockAddressSDNode(Opc, VT, BA, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1363,8 +1352,7 @@ SDValue SelectionDAG::getSrcValue(const Value *V) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>(); - new (N) SrcValueSDNode(V); + SDNode *N = new (NodeAllocator) SrcValueSDNode(V); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -2313,8 +2301,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<SDNode>(); - new (N) SDNode(Opcode, DL, getVTList(VT)); + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2542,12 +2529,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = NodeAllocator.Allocate<UnarySDNode>(); - new (N) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); CSEMap.InsertNode(N, IP); } else { - N = NodeAllocator.Allocate<UnarySDNode>(); - new (N) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); } AllNodes.push_back(N); @@ -2975,12 +2960,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = NodeAllocator.Allocate<BinarySDNode>(); - new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); CSEMap.InsertNode(N, IP); } else { - N = NodeAllocator.Allocate<BinarySDNode>(); - new (N) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); } AllNodes.push_back(N); @@ -3053,12 +3036,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = NodeAllocator.Allocate<TernarySDNode>(); - new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); } else { - N = NodeAllocator.Allocate<TernarySDNode>(); - new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); } AllNodes.push_back(N); @@ -3659,8 +3640,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); - new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO); + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, Cmp, Swp, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3722,8 +3703,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, cast<AtomicSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode* N = NodeAllocator.Allocate<AtomicSDNode>(); - new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO); + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, + Ptr, Val, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3801,12 +3782,12 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, return SDValue(E, 0); } - N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); - new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, + MemVT, MMO); CSEMap.InsertNode(N, IP); } else { - N = NodeAllocator.Allocate<MemIntrinsicSDNode>(); - new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, + MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -3879,8 +3860,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, cast<LoadSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = NodeAllocator.Allocate<LoadSDNode>(); - new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO); + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType, + MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -3961,8 +3942,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); - new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, + false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4025,8 +4006,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, cast<StoreSDNode>(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); - new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, + true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4048,10 +4029,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = NodeAllocator.Allocate<StoreSDNode>(); - new (N) StoreSDNode(Ops, dl, VTs, AM, - ST->isTruncatingStore(), ST->getMemoryVT(), - ST->getMemOperand()); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM, + ST->isTruncatingStore(), + ST->getMemoryVT(), + ST->getMemOperand()); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -4122,12 +4103,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = NodeAllocator.Allocate<SDNode>(); - new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); CSEMap.InsertNode(N, IP); } else { - N = NodeAllocator.Allocate<SDNode>(); - new (N) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); } AllNodes.push_back(N); @@ -4190,32 +4169,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(E, 0); if (NumOps == 1) { - N = NodeAllocator.Allocate<UnarySDNode>(); - new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); } else if (NumOps == 2) { - N = NodeAllocator.Allocate<BinarySDNode>(); - new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); } else if (NumOps == 3) { - N = NodeAllocator.Allocate<TernarySDNode>(); - new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], + Ops[2]); } else { - N = NodeAllocator.Allocate<SDNode>(); - new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); } CSEMap.InsertNode(N, IP); } else { if (NumOps == 1) { - N = NodeAllocator.Allocate<UnarySDNode>(); - new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); } else if (NumOps == 2) { - N = NodeAllocator.Allocate<BinarySDNode>(); - new (N) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); } else if (NumOps == 3) { - N = NodeAllocator.Allocate<TernarySDNode>(); - new (N) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], + Ops[2]); } else { - N = NodeAllocator.Allocate<SDNode>(); - new (N) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); } } AllNodes.push_back(N); @@ -4640,7 +4613,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, // remainder of the current SelectionDAG iteration, so we can allocate // the operands directly out of a pool with no recycling metadata. MN->InitOperands(OperandAllocator.Allocate<SDUse>(NumOps), - Ops, NumOps); + Ops, NumOps); else MN->InitOperands(MN->LocalOperands, Ops, NumOps); MN->OperandsNeedDelete = false; @@ -4814,8 +4787,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, } // Allocate a new MachineSDNode. - N = NodeAllocator.Allocate<MachineSDNode>(); - new (N) MachineSDNode(~Opcode, DL, VTs); + N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs); // Initialize the operands list. if (NumOps > array_lengthof(N->LocalOperands)) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3d9a4d5..12096b9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -1675,11 +1675,10 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR, } } - // Create a jump table index for this jump table, or return an existing - // one. + // Create a jump table index for this jump table. unsigned JTEncoding = TLI.getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) - ->getJumpTableIndex(DestBBs); + ->createJumpTableIndex(DestBBs); // Set the jump table information so that we can codegen it as a second // MachineBasicBlock @@ -2592,6 +2591,11 @@ void SelectionDAGBuilder::visitGetElementPtr(User &I) { } Ty = StTy->getElementType(Field); + } else if (const UnionType *UnTy = dyn_cast<UnionType>(Ty)) { + unsigned Field = cast<ConstantInt>(Idx)->getZExtValue(); + + // Offset canonically 0 for unions, but type changes + Ty = UnTy->getElementType(Field); } else { Ty = cast<SequentialType>(Ty)->getElementType(); @@ -4277,6 +4281,9 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr, --BBI) { if (&*BBI == I) break; + // Debug info intrinsics do not get in the way of tail call optimization. + if (isa<DbgInfoIntrinsic>(BBI)) + continue; if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() || !BBI->isSafeToSpeculativelyExecute()) return false; diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 5c62118..97e858f 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -31,6 +31,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -1639,11 +1640,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Save a copy of the virtual register live interval. We'll manually // merge this into the "real" physical register live interval this is // coalesced with. - LiveInterval *SavedLI = 0; + OwningPtr<LiveInterval> SavedLI; if (RealDstReg) - SavedLI = li_->dupInterval(&SrcInt); + SavedLI.reset(li_->dupInterval(&SrcInt)); else if (RealSrcReg) - SavedLI = li_->dupInterval(&DstInt); + SavedLI.reset(li_->dupInterval(&DstInt)); // Check if it is necessary to propagate "isDead" property. if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { @@ -1853,7 +1854,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Manually deleted the live interval copy. if (SavedLI) { SavedLI->clear(); - delete SavedLI; + SavedLI.reset(); } // If resulting interval has a preference that no longer fits because of subreg diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index fa3785d..aa6e2b4 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -495,7 +495,7 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, if (InstrCount == MaxDuplicateCount) return false; // Remember if we saw a call. if (I->getDesc().isCall()) HasCall = true; - if (!I->isPHI()) + if (!I->isPHI() && !I->isDebugValue()) InstrCount += 1; } // Heuristically, don't tail-duplicate calls if it would expand code size, diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 4cf71dc..dba0e14 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -12,11 +12,13 @@ add_llvm_library(LLVMMC MCInstPrinter.cpp MCMachOStreamer.cpp MCNullStreamer.cpp + MCObjectWriter.cpp MCSection.cpp MCSectionELF.cpp MCSectionMachO.cpp MCStreamer.cpp MCSymbol.cpp MCValue.cpp + MachObjectWriter.cpp TargetAsmBackend.cpp ) diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7f39471..2025463 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" @@ -29,7 +30,7 @@ namespace { class MCAsmStreamer : public MCStreamer { formatted_raw_ostream &OS; const MCAsmInfo &MAI; - MCInstPrinter *InstPrinter; + OwningPtr<MCInstPrinter> InstPrinter; MCCodeEmitter *Emitter; SmallString<128> CommentToEmit; diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 4cf8b7e..beecf7e 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -10,18 +10,16 @@ #define DEBUG_TYPE "assembler" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCAsmLayout.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MachO.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetRegistry.h" @@ -33,8 +31,6 @@ #include <vector> using namespace llvm; -class MachObjectWriter; - STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); // FIXME FIXME FIXME: There are number of places in this file where we convert @@ -42,917 +38,6 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments"); // object file, which may truncate it. We should detect that truncation where // invalid and report errors back. -static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, - MachObjectWriter &MOW); - -static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW); - -/// isVirtualSection - Check if this is a section which does not actually exist -/// in the object file. -static bool isVirtualSection(const MCSection &Section) { - // FIXME: Lame. - const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); - return (SMO.getType() == MCSectionMachO::S_ZEROFILL); -} - -static unsigned getFixupKindLog2Size(unsigned Kind) { - switch (Kind) { - default: llvm_unreachable("invalid fixup kind!"); - case X86::reloc_pcrel_1byte: - case FK_Data_1: return 0; - case FK_Data_2: return 1; - case X86::reloc_pcrel_4byte: - case X86::reloc_riprel_4byte: - case FK_Data_4: return 2; - case FK_Data_8: return 3; - } -} - -static bool isFixupKindPCRel(unsigned Kind) { - switch (Kind) { - default: - return false; - case X86::reloc_pcrel_1byte: - case X86::reloc_pcrel_4byte: - case X86::reloc_riprel_4byte: - return true; - } -} - -class MachObjectWriter { - // See <mach-o/loader.h>. - enum { - Header_Magic32 = 0xFEEDFACE, - Header_Magic64 = 0xFEEDFACF - }; - - enum { - Header32Size = 28, - Header64Size = 32, - SegmentLoadCommand32Size = 56, - SegmentLoadCommand64Size = 72, - Section32Size = 68, - Section64Size = 80, - SymtabLoadCommandSize = 24, - DysymtabLoadCommandSize = 80, - Nlist32Size = 12, - Nlist64Size = 16, - RelocationInfoSize = 8 - }; - - enum HeaderFileType { - HFT_Object = 0x1 - }; - - enum HeaderFlags { - HF_SubsectionsViaSymbols = 0x2000 - }; - - enum LoadCommandType { - LCT_Segment = 0x1, - LCT_Symtab = 0x2, - LCT_Dysymtab = 0xb, - LCT_Segment64 = 0x19 - }; - - // See <mach-o/nlist.h>. - enum SymbolTypeType { - STT_Undefined = 0x00, - STT_Absolute = 0x02, - STT_Section = 0x0e - }; - - enum SymbolTypeFlags { - // If any of these bits are set, then the entry is a stab entry number (see - // <mach-o/stab.h>. Otherwise the other masks apply. - STF_StabsEntryMask = 0xe0, - - STF_TypeMask = 0x0e, - STF_External = 0x01, - STF_PrivateExtern = 0x10 - }; - - /// IndirectSymbolFlags - Flags for encoding special values in the indirect - /// symbol entry. - enum IndirectSymbolFlags { - ISF_Local = 0x80000000, - ISF_Absolute = 0x40000000 - }; - - /// RelocationFlags - Special flags for addresses. - enum RelocationFlags { - RF_Scattered = 0x80000000 - }; - - enum RelocationInfoType { - RIT_Vanilla = 0, - RIT_Pair = 1, - RIT_Difference = 2, - RIT_PreboundLazyPointer = 3, - RIT_LocalDifference = 4 - }; - - /// MachSymbolData - Helper struct for containing some precomputed information - /// on symbols. - struct MachSymbolData { - MCSymbolData *SymbolData; - uint64_t StringIndex; - uint8_t SectionIndex; - - // Support lexicographic sorting. - bool operator<(const MachSymbolData &RHS) const { - const std::string &Name = SymbolData->getSymbol().getName(); - return Name < RHS.SymbolData->getSymbol().getName(); - } - }; - - raw_ostream &OS; - unsigned Is64Bit : 1; - unsigned IsLSB : 1; - -public: - MachObjectWriter(raw_ostream &_OS, bool _Is64Bit, bool _IsLSB = true) - : OS(_OS), Is64Bit(_Is64Bit), IsLSB(_IsLSB) { - } - - /// @name Helper Methods - /// @{ - - void Write8(uint8_t Value) { - OS << char(Value); - } - - void Write16(uint16_t Value) { - if (IsLSB) { - Write8(uint8_t(Value >> 0)); - Write8(uint8_t(Value >> 8)); - } else { - Write8(uint8_t(Value >> 8)); - Write8(uint8_t(Value >> 0)); - } - } - - void Write32(uint32_t Value) { - if (IsLSB) { - Write16(uint16_t(Value >> 0)); - Write16(uint16_t(Value >> 16)); - } else { - Write16(uint16_t(Value >> 16)); - Write16(uint16_t(Value >> 0)); - } - } - - void Write64(uint64_t Value) { - if (IsLSB) { - Write32(uint32_t(Value >> 0)); - Write32(uint32_t(Value >> 32)); - } else { - Write32(uint32_t(Value >> 32)); - Write32(uint32_t(Value >> 0)); - } - } - - void WriteZeros(unsigned N) { - const char Zeros[16] = { 0 }; - - for (unsigned i = 0, e = N / 16; i != e; ++i) - OS << StringRef(Zeros, 16); - - OS << StringRef(Zeros, N % 16); - } - - void WriteString(StringRef Str, unsigned ZeroFillSize = 0) { - OS << Str; - if (ZeroFillSize) - WriteZeros(ZeroFillSize - Str.size()); - } - - /// @} - - void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, - bool SubsectionsViaSymbols) { - uint32_t Flags = 0; - - if (SubsectionsViaSymbols) - Flags |= HF_SubsectionsViaSymbols; - - // struct mach_header (28 bytes) or - // struct mach_header_64 (32 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - Write32(Is64Bit ? Header_Magic64 : Header_Magic32); - - // FIXME: Support cputype. - Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386); - // FIXME: Support cpusubtype. - Write32(MachO::CPUSubType_I386_ALL); - Write32(HFT_Object); - Write32(NumLoadCommands); // Object files have a single load command, the - // segment. - Write32(LoadCommandsSize); - Write32(Flags); - if (Is64Bit) - Write32(0); // reserved - - assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size); - } - - /// WriteSegmentLoadCommand - Write a segment load command. - /// - /// \arg NumSections - The number of sections in this segment. - /// \arg SectionDataSize - The total size of the sections. - void WriteSegmentLoadCommand(unsigned NumSections, - uint64_t VMSize, - uint64_t SectionDataStartOffset, - uint64_t SectionDataSize) { - // struct segment_command (56 bytes) or - // struct segment_command_64 (72 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size : - SegmentLoadCommand32Size; - Write32(Is64Bit ? LCT_Segment64 : LCT_Segment); - Write32(SegmentLoadCommandSize + - NumSections * (Is64Bit ? Section64Size : Section32Size)); - - WriteString("", 16); - if (Is64Bit) { - Write64(0); // vmaddr - Write64(VMSize); // vmsize - Write64(SectionDataStartOffset); // file offset - Write64(SectionDataSize); // file size - } else { - Write32(0); // vmaddr - Write32(VMSize); // vmsize - Write32(SectionDataStartOffset); // file offset - Write32(SectionDataSize); // file size - } - Write32(0x7); // maxprot - Write32(0x7); // initprot - Write32(NumSections); - Write32(0); // flags - - assert(OS.tell() - Start == SegmentLoadCommandSize); - } - - void WriteSection(const MCSectionData &SD, uint64_t FileOffset, - uint64_t RelocationsStart, unsigned NumRelocations) { - // The offset is unused for virtual sections. - if (isVirtualSection(SD.getSection())) { - assert(SD.getFileSize() == 0 && "Invalid file size!"); - FileOffset = 0; - } - - // struct section (68 bytes) or - // struct section_64 (80 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - // FIXME: cast<> support! - const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(SD.getSection()); - WriteString(Section.getSectionName(), 16); - WriteString(Section.getSegmentName(), 16); - if (Is64Bit) { - Write64(SD.getAddress()); // address - Write64(SD.getSize()); // size - } else { - Write32(SD.getAddress()); // address - Write32(SD.getSize()); // size - } - Write32(FileOffset); - - unsigned Flags = Section.getTypeAndAttributes(); - if (SD.hasInstructions()) - Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; - - assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); - Write32(Log2_32(SD.getAlignment())); - Write32(NumRelocations ? RelocationsStart : 0); - Write32(NumRelocations); - Write32(Flags); - Write32(0); // reserved1 - Write32(Section.getStubSize()); // reserved2 - if (Is64Bit) - Write32(0); // reserved3 - - assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size); - } - - void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, - uint32_t StringTableOffset, - uint32_t StringTableSize) { - // struct symtab_command (24 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - Write32(LCT_Symtab); - Write32(SymtabLoadCommandSize); - Write32(SymbolOffset); - Write32(NumSymbols); - Write32(StringTableOffset); - Write32(StringTableSize); - - assert(OS.tell() - Start == SymtabLoadCommandSize); - } - - void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, - uint32_t NumLocalSymbols, - uint32_t FirstExternalSymbol, - uint32_t NumExternalSymbols, - uint32_t FirstUndefinedSymbol, - uint32_t NumUndefinedSymbols, - uint32_t IndirectSymbolOffset, - uint32_t NumIndirectSymbols) { - // struct dysymtab_command (80 bytes) - - uint64_t Start = OS.tell(); - (void) Start; - - Write32(LCT_Dysymtab); - Write32(DysymtabLoadCommandSize); - Write32(FirstLocalSymbol); - Write32(NumLocalSymbols); - Write32(FirstExternalSymbol); - Write32(NumExternalSymbols); - Write32(FirstUndefinedSymbol); - Write32(NumUndefinedSymbols); - Write32(0); // tocoff - Write32(0); // ntoc - Write32(0); // modtaboff - Write32(0); // nmodtab - Write32(0); // extrefsymoff - Write32(0); // nextrefsyms - Write32(IndirectSymbolOffset); - Write32(NumIndirectSymbols); - Write32(0); // extreloff - Write32(0); // nextrel - Write32(0); // locreloff - Write32(0); // nlocrel - - assert(OS.tell() - Start == DysymtabLoadCommandSize); - } - - void WriteNlist(MachSymbolData &MSD) { - MCSymbolData &Data = *MSD.SymbolData; - const MCSymbol &Symbol = Data.getSymbol(); - uint8_t Type = 0; - uint16_t Flags = Data.getFlags(); - uint32_t Address = 0; - - // Set the N_TYPE bits. See <mach-o/nlist.h>. - // - // FIXME: Are the prebound or indirect fields possible here? - if (Symbol.isUndefined()) - Type = STT_Undefined; - else if (Symbol.isAbsolute()) - Type = STT_Absolute; - else - Type = STT_Section; - - // FIXME: Set STAB bits. - - if (Data.isPrivateExtern()) - Type |= STF_PrivateExtern; - - // Set external bit. - if (Data.isExternal() || Symbol.isUndefined()) - Type |= STF_External; - - // Compute the symbol address. - if (Symbol.isDefined()) { - if (Symbol.isAbsolute()) { - llvm_unreachable("FIXME: Not yet implemented!"); - } else { - Address = Data.getAddress(); - } - } else if (Data.isCommon()) { - // Common symbols are encoded with the size in the address - // field, and their alignment in the flags. - Address = Data.getCommonSize(); - - // Common alignment is packed into the 'desc' bits. - if (unsigned Align = Data.getCommonAlignment()) { - unsigned Log2Size = Log2_32(Align); - assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); - if (Log2Size > 15) - llvm_report_error("invalid 'common' alignment '" + - Twine(Align) + "'"); - // FIXME: Keep this mask with the SymbolFlags enumeration. - Flags = (Flags & 0xF0FF) | (Log2Size << 8); - } - } - - // struct nlist (12 bytes) - - Write32(MSD.StringIndex); - Write8(Type); - Write8(MSD.SectionIndex); - - // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' - // value. - Write16(Flags); - if (Is64Bit) - Write64(Address); - else - Write32(Address); - } - - struct MachRelocationEntry { - uint32_t Word0; - uint32_t Word1; - }; - void ComputeScatteredRelocationInfo(MCAssembler &Asm, MCFragment &Fragment, - MCAsmFixup &Fixup, - const MCValue &Target, - std::vector<MachRelocationEntry> &Relocs) { - uint32_t Address = Fragment.getOffset() + Fixup.Offset; - unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); - unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); - unsigned Type = RIT_Vanilla; - - // See <reloc.h>. - const MCSymbol *A = Target.getSymA(); - MCSymbolData *A_SD = &Asm.getSymbolData(*A); - - if (!A_SD->getFragment()) - llvm_report_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression"); - - uint32_t Value = A_SD->getAddress(); - uint32_t Value2 = 0; - - if (const MCSymbol *B = Target.getSymB()) { - MCSymbolData *B_SD = &Asm.getSymbolData(*B); - - if (!B_SD->getFragment()) - llvm_report_error("symbol '" + B->getName() + - "' can not be undefined in a subtraction expression"); - - // Select the appropriate difference relocation type. - // - // Note that there is no longer any semantic difference between these two - // relocation types from the linkers point of view, this is done solely - // for pedantic compatibility with 'as'. - Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; - Value2 = B_SD->getAddress(); - } - - MachRelocationEntry MRE; - MRE.Word0 = ((Address << 0) | - (Type << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - RF_Scattered); - MRE.Word1 = Value; - Relocs.push_back(MRE); - - if (Type == RIT_Difference || Type == RIT_LocalDifference) { - MachRelocationEntry MRE; - MRE.Word0 = ((0 << 0) | - (RIT_Pair << 24) | - (Log2Size << 28) | - (IsPCRel << 30) | - RF_Scattered); - MRE.Word1 = Value2; - Relocs.push_back(MRE); - } - } - - void ComputeRelocationInfo(MCAssembler &Asm, MCDataFragment &Fragment, - MCAsmFixup &Fixup, - std::vector<MachRelocationEntry> &Relocs) { - unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); - unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); - - // FIXME: Share layout object. - MCAsmLayout Layout(Asm); - - // Evaluate the fixup; if the value was resolved, no relocation is needed. - MCValue Target; - if (Asm.EvaluateFixup(Layout, Fixup, &Fragment, Target, Fixup.FixedValue)) - return; - - // If this is a difference or a defined symbol plus an offset, then we need - // a scattered relocation entry. - uint32_t Offset = Target.getConstant(); - if (IsPCRel) - Offset += 1 << Log2Size; - if (Target.getSymB() || - (Target.getSymA() && !Target.getSymA()->isUndefined() && - Offset)) - return ComputeScatteredRelocationInfo(Asm, Fragment, Fixup, Target, - Relocs); - - // See <reloc.h>. - uint32_t Address = Fragment.getOffset() + Fixup.Offset; - uint32_t Value = 0; - unsigned Index = 0; - unsigned IsExtern = 0; - unsigned Type = 0; - - if (Target.isAbsolute()) { // constant - // SymbolNum of 0 indicates the absolute section. - // - // FIXME: Currently, these are never generated (see code below). I cannot - // find a case where they are actually emitted. - Type = RIT_Vanilla; - Value = 0; - } else { - const MCSymbol *Symbol = Target.getSymA(); - MCSymbolData *SD = &Asm.getSymbolData(*Symbol); - - if (Symbol->isUndefined()) { - IsExtern = 1; - Index = SD->getIndex(); - Value = 0; - } else { - // The index is the section ordinal. - // - // FIXME: O(N) - Index = 1; - MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); - for (; it != ie; ++it, ++Index) - if (&*it == SD->getFragment()->getParent()) - break; - assert(it != ie && "Unable to find section index!"); - Value = SD->getAddress(); - } - - Type = RIT_Vanilla; - } - - // struct relocation_info (8 bytes) - MachRelocationEntry MRE; - MRE.Word0 = Address; - MRE.Word1 = ((Index << 0) | - (IsPCRel << 24) | - (Log2Size << 25) | - (IsExtern << 27) | - (Type << 28)); - Relocs.push_back(MRE); - } - - void BindIndirectSymbols(MCAssembler &Asm) { - // This is the point where 'as' creates actual symbols for indirect symbols - // (in the following two passes). It would be easier for us to do this - // sooner when we see the attribute, but that makes getting the order in the - // symbol table much more complicated than it is worth. - // - // FIXME: Revisit this when the dust settles. - - // Bind non lazy symbol pointers first. - for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // FIXME: cast<> support! - const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(it->SectionData->getSection()); - - if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) - continue; - - Asm.getOrCreateSymbolData(*it->Symbol); - } - - // Then lazy symbol pointers and symbol stubs. - for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // FIXME: cast<> support! - const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(it->SectionData->getSection()); - - if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && - Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) - continue; - - // Set the symbol type to undefined lazy, but only on construction. - // - // FIXME: Do not hardcode. - bool Created; - MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); - if (Created) - Entry.setFlags(Entry.getFlags() | 0x0001); - } - } - - /// ComputeSymbolTable - Compute the symbol table data - /// - /// \param StringTable [out] - The string table data. - /// \param StringIndexMap [out] - Map from symbol names to offsets in the - /// string table. - void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, - std::vector<MachSymbolData> &LocalSymbolData, - std::vector<MachSymbolData> &ExternalSymbolData, - std::vector<MachSymbolData> &UndefinedSymbolData) { - // Build section lookup table. - DenseMap<const MCSection*, uint8_t> SectionIndexMap; - unsigned Index = 1; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it, ++Index) - SectionIndexMap[&it->getSection()] = Index; - assert(Index <= 256 && "Too many sections!"); - - // Index 0 is always the empty string. - StringMap<uint64_t> StringIndexMap; - StringTable += '\x00'; - - // Build the symbol arrays and the string table, but only for non-local - // symbols. - // - // The particular order that we collect the symbols and create the string - // table, then sort the symbols is chosen to match 'as'. Even though it - // doesn't matter for correctness, this is important for letting us diff .o - // files. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); - - // Ignore assembler temporaries. - if (it->getSymbol().isTemporary() && - (!it->getFragment() || - !Asm.getBackend().doesSectionRequireSymbols( - it->getFragment()->getParent()->getSection()))) - continue; - - if (!it->isExternal() && !Symbol.isUndefined()) - continue; - - uint64_t &Entry = StringIndexMap[Symbol.getName()]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Symbol.getName(); - StringTable += '\x00'; - } - - MachSymbolData MSD; - MSD.SymbolData = it; - MSD.StringIndex = Entry; - - if (Symbol.isUndefined()) { - MSD.SectionIndex = 0; - UndefinedSymbolData.push_back(MSD); - } else if (Symbol.isAbsolute()) { - MSD.SectionIndex = 0; - ExternalSymbolData.push_back(MSD); - } else { - MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); - assert(MSD.SectionIndex && "Invalid section index!"); - ExternalSymbolData.push_back(MSD); - } - } - - // Now add the data for local symbols. - for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), - ie = Asm.symbol_end(); it != ie; ++it) { - const MCSymbol &Symbol = it->getSymbol(); - - // Ignore assembler temporaries. - if (it->getSymbol().isTemporary() && - (!it->getFragment() || - !Asm.getBackend().doesSectionRequireSymbols( - it->getFragment()->getParent()->getSection()))) - continue; - - if (it->isExternal() || Symbol.isUndefined()) - continue; - - uint64_t &Entry = StringIndexMap[Symbol.getName()]; - if (!Entry) { - Entry = StringTable.size(); - StringTable += Symbol.getName(); - StringTable += '\x00'; - } - - MachSymbolData MSD; - MSD.SymbolData = it; - MSD.StringIndex = Entry; - - if (Symbol.isAbsolute()) { - MSD.SectionIndex = 0; - LocalSymbolData.push_back(MSD); - } else { - MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); - assert(MSD.SectionIndex && "Invalid section index!"); - LocalSymbolData.push_back(MSD); - } - } - - // External and undefined symbols are required to be in lexicographic order. - std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); - std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); - - // Set the symbol indices. - Index = 0; - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - LocalSymbolData[i].SymbolData->setIndex(Index++); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - ExternalSymbolData[i].SymbolData->setIndex(Index++); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - UndefinedSymbolData[i].SymbolData->setIndex(Index++); - - // The string table is padded to a multiple of 4. - while (StringTable.size() % 4) - StringTable += '\x00'; - } - - void WriteObject(MCAssembler &Asm) { - unsigned NumSections = Asm.size(); - - // Create symbol data for any indirect symbols. - BindIndirectSymbols(Asm); - - // Compute symbol table information. - SmallString<256> StringTable; - std::vector<MachSymbolData> LocalSymbolData; - std::vector<MachSymbolData> ExternalSymbolData; - std::vector<MachSymbolData> UndefinedSymbolData; - unsigned NumSymbols = Asm.symbol_size(); - - // No symbol table command is written if there are no symbols. - if (NumSymbols) - ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, - UndefinedSymbolData); - - // The section data starts after the header, the segment load command (and - // section headers) and the symbol table. - unsigned NumLoadCommands = 1; - uint64_t LoadCommandsSize = Is64Bit ? - SegmentLoadCommand64Size + NumSections * Section64Size : - SegmentLoadCommand32Size + NumSections * Section32Size; - - // Add the symbol table load command sizes, if used. - if (NumSymbols) { - NumLoadCommands += 2; - LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; - } - - // Compute the total size of the section data, as well as its file size and - // vm size. - uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size) - + LoadCommandsSize; - uint64_t SectionDataSize = 0; - uint64_t SectionDataFileSize = 0; - uint64_t VMSize = 0; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - MCSectionData &SD = *it; - - VMSize = std::max(VMSize, SD.getAddress() + SD.getSize()); - - if (isVirtualSection(SD.getSection())) - continue; - - SectionDataSize = std::max(SectionDataSize, - SD.getAddress() + SD.getSize()); - SectionDataFileSize = std::max(SectionDataFileSize, - SD.getAddress() + SD.getFileSize()); - } - - // The section data is padded to 4 bytes. - // - // FIXME: Is this machine dependent? - unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); - SectionDataFileSize += SectionDataPadding; - - // Write the prolog, starting with the header and load command... - WriteHeader(NumLoadCommands, LoadCommandsSize, - Asm.getSubsectionsViaSymbols()); - WriteSegmentLoadCommand(NumSections, VMSize, - SectionDataStart, SectionDataSize); - - // ... and then the section headers. - // - // We also compute the section relocations while we do this. Note that - // computing relocation info will also update the fixup to have the correct - // value; this will overwrite the appropriate data in the fragment when it - // is written. - std::vector<MachRelocationEntry> RelocInfos; - uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; - for (MCAssembler::iterator it = Asm.begin(), - ie = Asm.end(); it != ie; ++it) { - MCSectionData &SD = *it; - - // The assembler writes relocations in the reverse order they were seen. - // - // FIXME: It is probably more complicated than this. - unsigned NumRelocsStart = RelocInfos.size(); - for (MCSectionData::reverse_iterator it2 = SD.rbegin(), - ie2 = SD.rend(); it2 != ie2; ++it2) - if (MCDataFragment *DF = dyn_cast<MCDataFragment>(&*it2)) - for (unsigned i = 0, e = DF->fixup_size(); i != e; ++i) - ComputeRelocationInfo(Asm, *DF, DF->getFixups()[e - i - 1], - RelocInfos); - - unsigned NumRelocs = RelocInfos.size() - NumRelocsStart; - uint64_t SectionStart = SectionDataStart + SD.getAddress(); - WriteSection(SD, SectionStart, RelocTableEnd, NumRelocs); - RelocTableEnd += NumRelocs * RelocationInfoSize; - } - - // Write the symbol table load command, if used. - if (NumSymbols) { - unsigned FirstLocalSymbol = 0; - unsigned NumLocalSymbols = LocalSymbolData.size(); - unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; - unsigned NumExternalSymbols = ExternalSymbolData.size(); - unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; - unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); - unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); - unsigned NumSymTabSymbols = - NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; - uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; - uint64_t IndirectSymbolOffset = 0; - - // If used, the indirect symbols are written after the section data. - if (NumIndirectSymbols) - IndirectSymbolOffset = RelocTableEnd; - - // The symbol table is written after the indirect symbol data. - uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; - - // The string table is written after symbol table. - uint64_t StringTableOffset = - SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size : - Nlist32Size); - WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, - StringTableOffset, StringTable.size()); - - WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, - FirstExternalSymbol, NumExternalSymbols, - FirstUndefinedSymbol, NumUndefinedSymbols, - IndirectSymbolOffset, NumIndirectSymbols); - } - - // Write the actual section data. - for (MCAssembler::iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) - WriteFileData(OS, *it, *this); - - // Write the extra padding. - WriteZeros(SectionDataPadding); - - // Write the relocation entries. - for (unsigned i = 0, e = RelocInfos.size(); i != e; ++i) { - Write32(RelocInfos[i].Word0); - Write32(RelocInfos[i].Word1); - } - - // Write the symbol table data, if used. - if (NumSymbols) { - // Write the indirect symbol entries. - for (MCAssembler::indirect_symbol_iterator - it = Asm.indirect_symbol_begin(), - ie = Asm.indirect_symbol_end(); it != ie; ++it) { - // Indirect symbols in the non lazy symbol pointer section have some - // special handling. - const MCSectionMachO &Section = - static_cast<const MCSectionMachO&>(it->SectionData->getSection()); - if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { - // If this symbol is defined and internal, mark it as such. - if (it->Symbol->isDefined() && - !Asm.getSymbolData(*it->Symbol).isExternal()) { - uint32_t Flags = ISF_Local; - if (it->Symbol->isAbsolute()) - Flags |= ISF_Absolute; - Write32(Flags); - continue; - } - } - - Write32(Asm.getSymbolData(*it->Symbol).getIndex()); - } - - // FIXME: Check that offsets match computed ones. - - // Write the symbol table entries. - for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) - WriteNlist(LocalSymbolData[i]); - for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) - WriteNlist(ExternalSymbolData[i]); - for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) - WriteNlist(UndefinedSymbolData[i]); - - // Write the string table. - OS << StringTable.str(); - } - } - - void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF) { - unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); - - // FIXME: Endianness assumption. - assert(Fixup.Offset + Size <= DF.getContents().size() && - "Invalid fixup offset!"); - for (unsigned i = 0; i != Size; ++i) - DF.getContents()[Fixup.Offset + i] = uint8_t(Fixup.FixedValue >> (i * 8)); - } -}; - /* *** */ MCFragment::MCFragment() : Kind(FragmentType(~0)) { @@ -1008,14 +93,149 @@ MCSymbolData::MCSymbolData(const MCSymbol &_Symbol, MCFragment *_Fragment, /* *** */ MCAssembler::MCAssembler(MCContext &_Context, TargetAsmBackend &_Backend, - raw_ostream &_OS) - : Context(_Context), Backend(_Backend), OS(_OS), SubsectionsViaSymbols(false) + MCCodeEmitter &_Emitter, raw_ostream &_OS) + : Context(_Context), Backend(_Backend), Emitter(_Emitter), + OS(_OS), SubsectionsViaSymbols(false) { } MCAssembler::~MCAssembler() { } +static bool isScatteredFixupFullyResolvedSimple(const MCAssembler &Asm, + const MCAsmFixup &Fixup, + const MCDataFragment *DF, + const MCValue Target, + const MCSection *BaseSection) { + // The effective fixup address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // - addr(<base symbol>) + <fixup offset from base symbol> + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) - addr(<base symbol>)) == 0. + // + // The simple (Darwin, except on x86_64) way of dealing with this was to + // assume that any reference to a temporary symbol *must* be a temporary + // symbol in the same atom, unless the sections differ. Therefore, any PCrel + // relocation to a temporary symbol (in the same section) is fully + // resolved. This also works in conjunction with absolutized .set, which + // requires the compiler to use .set to absolutize the differences between + // symbols which the compiler knows to be assembly time constants, so we don't + // need to worry about consider symbol differences fully resolved. + + // Non-relative fixups are only resolved if constant. + if (!BaseSection) + return Target.isAbsolute(); + + // Otherwise, relative fixups are only resolved if not a difference and the + // target is a temporary in the same section. + if (Target.isAbsolute() || Target.getSymB()) + return false; + + const MCSymbol *A = &Target.getSymA()->getSymbol(); + if (!A->isTemporary() || !A->isInSection() || + &A->getSection() != BaseSection) + return false; + + return true; +} + +static bool isScatteredFixupFullyResolved(const MCAssembler &Asm, + const MCAsmFixup &Fixup, + const MCDataFragment *DF, + const MCValue Target, + const MCSymbolData *BaseSymbol) { + // The effective fixup address is + // addr(atom(A)) + offset(A) + // - addr(atom(B)) - offset(B) + // - addr(BaseSymbol) + <fixup offset from base symbol> + // and the offsets are not relocatable, so the fixup is fully resolved when + // addr(atom(A)) - addr(atom(B)) - addr(BaseSymbol) == 0. + // + // Note that "false" is almost always conservatively correct (it means we emit + // a relocation which is unnecessary), except when it would force us to emit a + // relocation which the target cannot encode. + + const MCSymbolData *A_Base = 0, *B_Base = 0; + if (const MCSymbolRefExpr *A = Target.getSymA()) { + // Modified symbol references cannot be resolved. + if (A->getKind() != MCSymbolRefExpr::VK_None) + return false; + + A_Base = Asm.getAtom(&Asm.getSymbolData(A->getSymbol())); + if (!A_Base) + return false; + } + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + // Modified symbol references cannot be resolved. + if (B->getKind() != MCSymbolRefExpr::VK_None) + return false; + + B_Base = Asm.getAtom(&Asm.getSymbolData(B->getSymbol())); + if (!B_Base) + return false; + } + + // If there is no base, A and B have to be the same atom for this fixup to be + // fully resolved. + if (!BaseSymbol) + return A_Base == B_Base; + + // Otherwise, B must be missing and A must be the base. + return !B_Base && BaseSymbol == A_Base; +} + +bool MCAssembler::isSymbolLinkerVisible(const MCSymbolData *SD) const { + // Non-temporary labels should always be visible to the linker. + if (!SD->getSymbol().isTemporary()) + return true; + + // Absolute temporary labels are never visible. + if (!SD->getFragment()) + return false; + + // Otherwise, check if the section requires symbols even for temporary labels. + return getBackend().doesSectionRequireSymbols( + SD->getFragment()->getParent()->getSection()); +} + +const MCSymbolData *MCAssembler::getAtomForAddress(const MCSectionData *Section, + uint64_t Address) const { + const MCSymbolData *Best = 0; + for (MCAssembler::const_symbol_iterator it = symbol_begin(), + ie = symbol_end(); it != ie; ++it) { + // Ignore non-linker visible symbols. + if (!isSymbolLinkerVisible(it)) + continue; + + // Ignore symbols not in the same section. + if (!it->getFragment() || it->getFragment()->getParent() != Section) + continue; + + // Otherwise, find the closest symbol preceding this address (ties are + // resolved in favor of the last defined symbol). + if (it->getAddress() <= Address && + (!Best || it->getAddress() >= Best->getAddress())) + Best = it; + } + + return Best; +} + +const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { + // Linker visible symbols define atoms. + if (isSymbolLinkerVisible(SD)) + return SD; + + // Absolute and undefined symbols have no defining atom. + if (!SD->getFragment()) + return 0; + + // Otherwise, search by address. + return getAtomForAddress(SD->getFragment()->getParent(), SD->getAddress()); +} + bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup, MCDataFragment *DF, MCValue &Target, uint64_t &Value) const { @@ -1028,34 +248,47 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, MCAsmFixup &Fixup, Value = Target.getConstant(); - // FIXME: This "resolved" check isn't quite right. The assumption is that if - // we have a PCrel access to a temporary, then that temporary is in the same - // atom, and so the value is resolved. We need explicit atom's to implement - // this more precisely. - bool IsResolved = true, IsPCRel = isFixupKindPCRel(Fixup.Kind); - if (const MCSymbol *Symbol = Target.getSymA()) { - if (Symbol->isDefined()) - Value += getSymbolData(*Symbol).getAddress(); + bool IsPCRel = + Emitter.getFixupKindInfo(Fixup.Kind).Flags & MCFixupKindInfo::FKF_IsPCRel; + bool IsResolved = true; + if (const MCSymbolRefExpr *A = Target.getSymA()) { + if (A->getSymbol().isDefined()) + Value += getSymbolData(A->getSymbol()).getAddress(); else IsResolved = false; - - // With scattered symbols, we assume anything that isn't a PCrel temporary - // access can have an arbitrary value. - if (getBackend().hasScatteredSymbols() && - (!IsPCRel || !Symbol->isTemporary())) - IsResolved = false; } - if (const MCSymbol *Symbol = Target.getSymB()) { - if (Symbol->isDefined()) - Value -= getSymbolData(*Symbol).getAddress(); + if (const MCSymbolRefExpr *B = Target.getSymB()) { + if (B->getSymbol().isDefined()) + Value -= getSymbolData(B->getSymbol()).getAddress(); else IsResolved = false; + } - // With scattered symbols, we assume anything that isn't a PCrel temporary - // access can have an arbitrary value. - if (getBackend().hasScatteredSymbols() && - (!IsPCRel || !Symbol->isTemporary())) - IsResolved = false; + // If we are using scattered symbols, determine whether this value is actually + // resolved; scattering may cause atoms to move. + if (IsResolved && getBackend().hasScatteredSymbols()) { + if (getBackend().hasReliableSymbolDifference()) { + // If this is a PCrel relocation, find the base atom (identified by its + // symbol) that the fixup value is relative to. + const MCSymbolData *BaseSymbol = 0; + if (IsPCRel) { + BaseSymbol = getAtomForAddress( + DF->getParent(), DF->getAddress() + Fixup.Offset); + if (!BaseSymbol) + IsResolved = false; + } + + if (IsResolved) + IsResolved = isScatteredFixupFullyResolved(*this, Fixup, DF, Target, + BaseSymbol); + } else { + const MCSection *BaseSection = 0; + if (IsPCRel) + BaseSection = &DF->getParent()->getSection(); + + IsResolved = isScatteredFixupFullyResolvedSimple(*this, Fixup, DF, Target, + BaseSection); + } } if (IsPCRel) @@ -1127,7 +360,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { // Set the section sizes. SD.setSize(Address - SD.getAddress()); - if (isVirtualSection(SD.getSection())) + if (getBackend().isVirtualSection(SD.getSection())) SD.setFileSize(0); else SD.setFileSize(Address - SD.getAddress()); @@ -1138,7 +371,7 @@ void MCAssembler::LayoutSection(MCSectionData &SD) { /// the \arg Count is more than the maximum optimal nops. /// /// FIXME this is X86 32-bit specific and should move to a better place. -static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) { +static uint64_t WriteNopData(uint64_t Count, MCObjectWriter *OW) { static const uint8_t Nops[16][16] = { // nop {0x90}, @@ -1186,15 +419,14 @@ static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) { return 0; for (uint64_t i = 0; i < Count; i++) - MOW.Write8 (uint8_t(Nops[Count - 1][i])); + OW->Write8(uint8_t(Nops[Count - 1][i])); return Count; } -/// WriteFileData - Write the \arg F data to the output file. -static void WriteFileData(raw_ostream &OS, const MCFragment &F, - MachObjectWriter &MOW) { - uint64_t Start = OS.tell(); +/// WriteFragmentData - Write the \arg F data to the output file. +static void WriteFragmentData(const MCFragment &F, MCObjectWriter *OW) { + uint64_t Start = OW->getStream().tell(); (void) Start; ++EmittedFragments; @@ -1218,7 +450,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, // the Count bytes. Then if that did not fill any bytes or there are any // bytes left to fill use the the Value and ValueSize to fill the rest. if (AF.getEmitNops()) { - uint64_t NopByteCount = WriteNopData(Count, MOW); + uint64_t NopByteCount = WriteNopData(Count, OW); Count -= NopByteCount; } @@ -1226,26 +458,17 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, switch (AF.getValueSize()) { default: assert(0 && "Invalid size!"); - case 1: MOW.Write8 (uint8_t (AF.getValue())); break; - case 2: MOW.Write16(uint16_t(AF.getValue())); break; - case 4: MOW.Write32(uint32_t(AF.getValue())); break; - case 8: MOW.Write64(uint64_t(AF.getValue())); break; + case 1: OW->Write8 (uint8_t (AF.getValue())); break; + case 2: OW->Write16(uint16_t(AF.getValue())); break; + case 4: OW->Write32(uint32_t(AF.getValue())); break; + case 8: OW->Write64(uint64_t(AF.getValue())); break; } } break; } case MCFragment::FT_Data: { - MCDataFragment &DF = cast<MCDataFragment>(F); - - // Apply the fixups. - // - // FIXME: Move elsewhere. - for (MCDataFragment::const_fixup_iterator it = DF.fixup_begin(), - ie = DF.fixup_end(); it != ie; ++it) - MOW.ApplyFixup(*it, DF); - - OS << cast<MCDataFragment>(F).getContents().str(); + OW->WriteBytes(cast<MCDataFragment>(F).getContents().str()); break; } @@ -1255,10 +478,10 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, switch (FF.getValueSize()) { default: assert(0 && "Invalid size!"); - case 1: MOW.Write8 (uint8_t (FF.getValue())); break; - case 2: MOW.Write16(uint16_t(FF.getValue())); break; - case 4: MOW.Write32(uint32_t(FF.getValue())); break; - case 8: MOW.Write64(uint64_t(FF.getValue())); break; + case 1: OW->Write8 (uint8_t (FF.getValue())); break; + case 2: OW->Write16(uint16_t(FF.getValue())); break; + case 4: OW->Write32(uint32_t(FF.getValue())); break; + case 8: OW->Write64(uint64_t(FF.getValue())); break; } } break; @@ -1268,7 +491,7 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, MCOrgFragment &OF = cast<MCOrgFragment>(F); for (uint64_t i = 0, e = OF.getFileSize(); i != e; ++i) - MOW.Write8(uint8_t(OF.getValue())); + OW->Write8(uint8_t(OF.getValue())); break; } @@ -1279,30 +502,29 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F, } } - assert(OS.tell() - Start == F.getFileSize()); + assert(OW->getStream().tell() - Start == F.getFileSize()); } -/// WriteFileData - Write the \arg SD data to the output file. -static void WriteFileData(raw_ostream &OS, const MCSectionData &SD, - MachObjectWriter &MOW) { +void MCAssembler::WriteSectionData(const MCSectionData *SD, + MCObjectWriter *OW) const { // Ignore virtual sections. - if (isVirtualSection(SD.getSection())) { - assert(SD.getFileSize() == 0); + if (getBackend().isVirtualSection(SD->getSection())) { + assert(SD->getFileSize() == 0); return; } - uint64_t Start = OS.tell(); + uint64_t Start = OW->getStream().tell(); (void) Start; - for (MCSectionData::const_iterator it = SD.begin(), - ie = SD.end(); it != ie; ++it) - WriteFileData(OS, *it, MOW); + for (MCSectionData::const_iterator it = SD->begin(), + ie = SD->end(); it != ie; ++it) + WriteFragmentData(*it, OW); // Add section padding. - assert(SD.getFileSize() >= SD.getSize() && "Invalid section sizes!"); - MOW.WriteZeros(SD.getFileSize() - SD.getSize()); + assert(SD->getFileSize() >= SD->getSize() && "Invalid section sizes!"); + OW->WriteZeros(SD->getFileSize() - SD->getSize()); - assert(OS.tell() - Start == SD.getFileSize()); + assert(OW->getStream().tell() - Start == SD->getFileSize()); } void MCAssembler::Finish() { @@ -1318,13 +540,47 @@ void MCAssembler::Finish() { llvm::errs() << "assembler backend - post-layout\n--\n"; dump(); }); - // Write the object file. - // // FIXME: Factor out MCObjectWriter. - bool Is64Bit = StringRef(getBackend().getTarget().getName()) == "x86-64"; - MachObjectWriter MOW(OS, Is64Bit); - MOW.WriteObject(*this); + llvm::OwningPtr<MCObjectWriter> Writer(getBackend().createObjectWriter(OS)); + if (!Writer) + llvm_report_error("unable to create object writer!"); + + // Allow the object writer a chance to perform post-layout binding (for + // example, to set the index fields in the symbol data). + Writer->ExecutePostLayoutBinding(*this); + + // Evaluate and apply the fixups, generating relocation entries as necessary. + // + // FIXME: Share layout object. + MCAsmLayout Layout(*this); + for (MCAssembler::iterator it = begin(), ie = end(); it != ie; ++it) { + for (MCSectionData::iterator it2 = it->begin(), + ie2 = it->end(); it2 != ie2; ++it2) { + MCDataFragment *DF = dyn_cast<MCDataFragment>(it2); + if (!DF) + continue; + + for (MCDataFragment::fixup_iterator it3 = DF->fixup_begin(), + ie3 = DF->fixup_end(); it3 != ie3; ++it3) { + MCAsmFixup &Fixup = *it3; + // Evaluate the fixup. + MCValue Target; + uint64_t FixedValue; + if (!EvaluateFixup(Layout, Fixup, DF, Target, FixedValue)) { + // The fixup was unresolved, we need a relocation. Inform the object + // writer of the relocation, and give it an opportunity to adjust the + // fixup value if need be. + Writer->RecordRelocation(*this, *DF, Fixup, Target, FixedValue); + } + + getBackend().ApplyFixup(Fixup, *DF, FixedValue); + } + } + } + + // Write the object file. + Writer->WriteObject(*this); OS.flush(); } @@ -1354,7 +610,7 @@ bool MCAssembler::LayoutOnce() { MCSectionData &SD = *it; // Skip virtual sections. - if (isVirtualSection(SD.getSection())) + if (getBackend().isVirtualSection(SD.getSection())) continue; // Align this section if necessary by adding padding bytes to the previous @@ -1377,7 +633,7 @@ bool MCAssembler::LayoutOnce() { for (iterator it = begin(), ie = end(); it != ie; ++it) { MCSectionData &SD = *it; - if (!isVirtualSection(SD.getSection())) + if (!getBackend().isVirtualSection(SD.getSection())) continue; // Align this section if necessary by adding padding bytes to the previous diff --git a/lib/MC/MCCodeEmitter.cpp b/lib/MC/MCCodeEmitter.cpp index accb06c..d513237 100644 --- a/lib/MC/MCCodeEmitter.cpp +++ b/lib/MC/MCCodeEmitter.cpp @@ -19,10 +19,10 @@ MCCodeEmitter::~MCCodeEmitter() { const MCFixupKindInfo &MCCodeEmitter::getFixupKindInfo(MCFixupKind Kind) const { static const MCFixupKindInfo Builtins[] = { - { "FK_Data_1", 0, 8 }, - { "FK_Data_2", 0, 16 }, - { "FK_Data_4", 0, 32 }, - { "FK_Data_8", 0, 64 } + { "FK_Data_1", 0, 8, 0 }, + { "FK_Data_2", 0, 16, 0 }, + { "FK_Data_4", 0, 32, 0 }, + { "FK_Data_8", 0, 64, 0 } }; assert(Kind <= 3 && "Unknown fixup kind"); diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 70c89a2..37e8282 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -49,17 +49,6 @@ MCSymbol *MCContext::CreateTempSymbol() { "tmp" + Twine(NextUniqueID++)); } - -MCSymbol *MCContext::GetOrCreateTemporarySymbol(StringRef Name) { - // If there is no name, create a new anonymous symbol. - // FIXME: Remove this. This form of the method should always take a name. - if (Name.empty()) - return GetOrCreateTemporarySymbol(Twine(MAI.getPrivateGlobalPrefix()) + - "tmp" + Twine(NextUniqueID++)); - - return GetOrCreateSymbol(Name, true); -} - MCSymbol *MCContext::GetOrCreateTemporarySymbol(const Twine &Name) { SmallString<128> NameSV; Name.toVector(NameSV); diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index a2ed20b..2759944 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -30,7 +30,7 @@ void MCExpr::print(raw_ostream &OS) const { case MCExpr::SymbolRef: { const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*this); const MCSymbol &Sym = SRE.getSymbol(); - + // Parenthesize names that start with $ so that they don't look like // absolute names. if (Sym.getName()[0] == '$') @@ -59,14 +59,14 @@ void MCExpr::print(raw_ostream &OS) const { case MCExpr::Binary: { const MCBinaryExpr &BE = cast<MCBinaryExpr>(*this); - + // Only print parens around the LHS if it is non-trivial. if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS())) { OS << *BE.getLHS(); } else { OS << '(' << *BE.getLHS() << ')'; } - + switch (BE.getOpcode()) { default: assert(0 && "Invalid opcode!"); case MCBinaryExpr::Add: @@ -77,7 +77,7 @@ void MCExpr::print(raw_ostream &OS) const { return; } } - + OS << '+'; break; case MCBinaryExpr::And: OS << '&'; break; @@ -98,7 +98,7 @@ void MCExpr::print(raw_ostream &OS) const { case MCBinaryExpr::Sub: OS << '-'; break; case MCBinaryExpr::Xor: OS << '^'; break; } - + // Only print parens around the LHS if it is non-trivial. if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { OS << *BE.getRHS(); @@ -193,7 +193,7 @@ void MCTargetExpr::Anchor() {} bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const { MCValue Value; - + if (!EvaluateAsRelocatable(Value, Layout) || !Value.isAbsolute()) return false; @@ -201,16 +201,16 @@ bool MCExpr::EvaluateAsAbsolute(int64_t &Res, const MCAsmLayout *Layout) const { return true; } -static bool EvaluateSymbolicAdd(const MCValue &LHS, const MCSymbol *RHS_A, - const MCSymbol *RHS_B, int64_t RHS_Cst, +static bool EvaluateSymbolicAdd(const MCValue &LHS,const MCSymbolRefExpr *RHS_A, + const MCSymbolRefExpr *RHS_B, int64_t RHS_Cst, MCValue &Res) { // We can't add or subtract two symbols. if ((LHS.getSymA() && RHS_A) || (LHS.getSymB() && RHS_B)) return false; - const MCSymbol *A = LHS.getSymA() ? LHS.getSymA() : RHS_A; - const MCSymbol *B = LHS.getSymB() ? LHS.getSymB() : RHS_B; + const MCSymbolRefExpr *A = LHS.getSymA() ? LHS.getSymA() : RHS_A; + const MCSymbolRefExpr *B = LHS.getSymB() ? LHS.getSymB() : RHS_B; if (B) { // If we have a negated symbol, then we must have also have a non-negated // symbol in order to encode the expression. We can do this check later to @@ -228,13 +228,14 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, switch (getKind()) { case Target: return cast<MCTargetExpr>(this)->EvaluateAsRelocatableImpl(Res, Layout); - + case Constant: Res = MCValue::get(cast<MCConstantExpr>(this)->getValue()); return true; case SymbolRef: { - const MCSymbol &Sym = cast<MCSymbolRefExpr>(this)->getSymbol(); + const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(this); + const MCSymbol &Sym = SRE->getSymbol(); // Evaluate recursively if this is a variable. if (Sym.isVariable()) { @@ -245,9 +246,12 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, // layout object and the target requests it. if (Layout && Res.getSymB() && Layout->getAssembler().getBackend().hasAbsolutizedSet() && - Res.getSymA()->isDefined() && Res.getSymB()->isDefined()) { - MCSymbolData &A = Layout->getAssembler().getSymbolData(*Res.getSymA()); - MCSymbolData &B = Layout->getAssembler().getSymbolData(*Res.getSymB()); + Res.getSymA()->getSymbol().isDefined() && + Res.getSymB()->getSymbol().isDefined()) { + MCSymbolData &A = + Layout->getAssembler().getSymbolData(Res.getSymA()->getSymbol()); + MCSymbolData &B = + Layout->getAssembler().getSymbolData(Res.getSymB()->getSymbol()); Res = MCValue::get(+ A.getFragment()->getAddress() + A.getOffset() - B.getFragment()->getAddress() - B.getOffset() + Res.getConstant()); @@ -256,7 +260,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, return true; } - Res = MCValue::get(&Sym, 0, 0); + Res = MCValue::get(SRE, 0, 0); return true; } @@ -277,13 +281,13 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, /// -(a - b + const) ==> (b - a - const) if (Value.getSymA() && !Value.getSymB()) return false; - Res = MCValue::get(Value.getSymB(), Value.getSymA(), - -Value.getConstant()); + Res = MCValue::get(Value.getSymB(), Value.getSymA(), + -Value.getConstant()); break; case MCUnaryExpr::Not: if (!Value.isAbsolute()) return false; - Res = MCValue::get(~Value.getConstant()); + Res = MCValue::get(~Value.getConstant()); break; case MCUnaryExpr::Plus: Res = Value; @@ -296,7 +300,7 @@ bool MCExpr::EvaluateAsRelocatable(MCValue &Res, case Binary: { const MCBinaryExpr *ABE = cast<MCBinaryExpr>(this); MCValue LHSValue, RHSValue; - + if (!ABE->getLHS()->EvaluateAsRelocatable(LHSValue, Layout) || !ABE->getRHS()->EvaluateAsRelocatable(RHSValue, Layout)) return false; diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 73b1074..9504392 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -45,7 +45,6 @@ class MCMachOStreamer : public MCStreamer { private: MCAssembler Assembler; - MCCodeEmitter *Emitter; MCSectionData *CurSectionData; private: @@ -61,7 +60,7 @@ private: public: MCMachOStreamer(MCContext &Context, TargetAsmBackend &TAB, raw_ostream &_OS, MCCodeEmitter *_Emitter) - : MCStreamer(Context), Assembler(Context, TAB, _OS), Emitter(_Emitter), + : MCStreamer(Context), Assembler(Context, TAB, *_Emitter, _OS), CurSectionData(0) {} ~MCMachOStreamer() {} @@ -370,15 +369,12 @@ void MCMachOStreamer::EmitInstruction(const MCInst &Inst) { if (Inst.getOperand(i).isExpr()) AddValueSymbols(Inst.getOperand(i).getExpr()); - if (!Emitter) - llvm_unreachable("no code emitter available!"); - CurSectionData->setHasInstructions(true); SmallVector<MCFixup, 4> Fixups; SmallString<256> Code; raw_svector_ostream VecOS(Code); - Emitter->EncodeInstruction(Inst, VecOS, Fixups); + Assembler.getEmitter().EncodeInstruction(Inst, VecOS, Fixups); VecOS.flush(); // Add the fixups and data. diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp new file mode 100644 index 0000000..d117e82 --- /dev/null +++ b/lib/MC/MCObjectWriter.cpp @@ -0,0 +1,15 @@ +//===- lib/MC/MCObjectWriter.cpp - MCObjectWriter implementation ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectWriter.h" + +using namespace llvm; + +MCObjectWriter::~MCObjectWriter() { +} diff --git a/lib/MC/MCValue.cpp b/lib/MC/MCValue.cpp index 043a49d..c6ea16c 100644 --- a/lib/MC/MCValue.cpp +++ b/lib/MC/MCValue.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCValue.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -19,10 +20,12 @@ void MCValue::print(raw_ostream &OS, const MCAsmInfo *MAI) const { return; } - OS << *getSymA(); + getSymA()->print(OS); - if (getSymB()) - OS << " - " << *getSymB(); + if (getSymB()) { + OS << " - "; + getSymB()->print(OS); + } if (getConstant()) OS << " + " << getConstant(); diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp new file mode 100644 index 0000000..4b08c22 --- /dev/null +++ b/lib/MC/MachObjectWriter.cpp @@ -0,0 +1,1109 @@ +//===- lib/MC/MachObjectWriter.cpp - Mach-O File Writer -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MachObjectWriter.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectWriter.h" +#include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCValue.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" +#include "llvm/Target/TargetAsmBackend.h" + +// FIXME: Gross. +#include "../Target/X86/X86FixupKinds.h" + +#include <vector> +using namespace llvm; + +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: llvm_unreachable("invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + +static bool isFixupKindPCRel(unsigned Kind) { + switch (Kind) { + default: + return false; + case X86::reloc_pcrel_1byte: + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + return true; + } +} + +static bool isFixupKindRIPRel(unsigned Kind) { + return Kind == X86::reloc_riprel_4byte || + Kind == X86::reloc_riprel_4byte_movq_load; +} + +namespace { + +class MachObjectWriterImpl { + // See <mach-o/loader.h>. + enum { + Header_Magic32 = 0xFEEDFACE, + Header_Magic64 = 0xFEEDFACF + }; + + enum { + Header32Size = 28, + Header64Size = 32, + SegmentLoadCommand32Size = 56, + SegmentLoadCommand64Size = 72, + Section32Size = 68, + Section64Size = 80, + SymtabLoadCommandSize = 24, + DysymtabLoadCommandSize = 80, + Nlist32Size = 12, + Nlist64Size = 16, + RelocationInfoSize = 8 + }; + + enum HeaderFileType { + HFT_Object = 0x1 + }; + + enum HeaderFlags { + HF_SubsectionsViaSymbols = 0x2000 + }; + + enum LoadCommandType { + LCT_Segment = 0x1, + LCT_Symtab = 0x2, + LCT_Dysymtab = 0xb, + LCT_Segment64 = 0x19 + }; + + // See <mach-o/nlist.h>. + enum SymbolTypeType { + STT_Undefined = 0x00, + STT_Absolute = 0x02, + STT_Section = 0x0e + }; + + enum SymbolTypeFlags { + // If any of these bits are set, then the entry is a stab entry number (see + // <mach-o/stab.h>. Otherwise the other masks apply. + STF_StabsEntryMask = 0xe0, + + STF_TypeMask = 0x0e, + STF_External = 0x01, + STF_PrivateExtern = 0x10 + }; + + /// IndirectSymbolFlags - Flags for encoding special values in the indirect + /// symbol entry. + enum IndirectSymbolFlags { + ISF_Local = 0x80000000, + ISF_Absolute = 0x40000000 + }; + + /// RelocationFlags - Special flags for addresses. + enum RelocationFlags { + RF_Scattered = 0x80000000 + }; + + enum RelocationInfoType { + RIT_Vanilla = 0, + RIT_Pair = 1, + RIT_Difference = 2, + RIT_PreboundLazyPointer = 3, + RIT_LocalDifference = 4 + }; + + /// X86_64 uses its own relocation types. + enum RelocationInfoTypeX86_64 { + RIT_X86_64_Unsigned = 0, + RIT_X86_64_Signed = 1, + RIT_X86_64_Branch = 2, + RIT_X86_64_GOTLoad = 3, + RIT_X86_64_GOT = 4, + RIT_X86_64_Subtractor = 5, + RIT_X86_64_Signed1 = 6, + RIT_X86_64_Signed2 = 7, + RIT_X86_64_Signed4 = 8 + }; + + /// MachSymbolData - Helper struct for containing some precomputed information + /// on symbols. + struct MachSymbolData { + MCSymbolData *SymbolData; + uint64_t StringIndex; + uint8_t SectionIndex; + + // Support lexicographic sorting. + bool operator<(const MachSymbolData &RHS) const { + const std::string &Name = SymbolData->getSymbol().getName(); + return Name < RHS.SymbolData->getSymbol().getName(); + } + }; + + /// @name Relocation Data + /// @{ + + struct MachRelocationEntry { + uint32_t Word0; + uint32_t Word1; + }; + + llvm::DenseMap<const MCSectionData*, + std::vector<MachRelocationEntry> > Relocations; + + /// @} + /// @name Symbol Table Data + /// @{ + + SmallString<256> StringTable; + std::vector<MachSymbolData> LocalSymbolData; + std::vector<MachSymbolData> ExternalSymbolData; + std::vector<MachSymbolData> UndefinedSymbolData; + + /// @} + + MachObjectWriter *Writer; + + raw_ostream &OS; + + unsigned Is64Bit : 1; + +public: + MachObjectWriterImpl(MachObjectWriter *_Writer, bool _Is64Bit) + : Writer(_Writer), OS(Writer->getStream()), Is64Bit(_Is64Bit) { + } + + void Write8(uint8_t Value) { Writer->Write8(Value); } + void Write16(uint16_t Value) { Writer->Write16(Value); } + void Write32(uint32_t Value) { Writer->Write32(Value); } + void Write64(uint64_t Value) { Writer->Write64(Value); } + void WriteZeros(unsigned N) { Writer->WriteZeros(N); } + void WriteBytes(StringRef Str, unsigned ZeroFillSize = 0) { + Writer->WriteBytes(Str, ZeroFillSize); + } + + void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize, + bool SubsectionsViaSymbols) { + uint32_t Flags = 0; + + if (SubsectionsViaSymbols) + Flags |= HF_SubsectionsViaSymbols; + + // struct mach_header (28 bytes) or + // struct mach_header_64 (32 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(Is64Bit ? Header_Magic64 : Header_Magic32); + + // FIXME: Support cputype. + Write32(Is64Bit ? MachO::CPUTypeX86_64 : MachO::CPUTypeI386); + // FIXME: Support cpusubtype. + Write32(MachO::CPUSubType_I386_ALL); + Write32(HFT_Object); + Write32(NumLoadCommands); // Object files have a single load command, the + // segment. + Write32(LoadCommandsSize); + Write32(Flags); + if (Is64Bit) + Write32(0); // reserved + + assert(OS.tell() - Start == Is64Bit ? Header64Size : Header32Size); + } + + /// WriteSegmentLoadCommand - Write a segment load command. + /// + /// \arg NumSections - The number of sections in this segment. + /// \arg SectionDataSize - The total size of the sections. + void WriteSegmentLoadCommand(unsigned NumSections, + uint64_t VMSize, + uint64_t SectionDataStartOffset, + uint64_t SectionDataSize) { + // struct segment_command (56 bytes) or + // struct segment_command_64 (72 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + unsigned SegmentLoadCommandSize = Is64Bit ? SegmentLoadCommand64Size : + SegmentLoadCommand32Size; + Write32(Is64Bit ? LCT_Segment64 : LCT_Segment); + Write32(SegmentLoadCommandSize + + NumSections * (Is64Bit ? Section64Size : Section32Size)); + + WriteBytes("", 16); + if (Is64Bit) { + Write64(0); // vmaddr + Write64(VMSize); // vmsize + Write64(SectionDataStartOffset); // file offset + Write64(SectionDataSize); // file size + } else { + Write32(0); // vmaddr + Write32(VMSize); // vmsize + Write32(SectionDataStartOffset); // file offset + Write32(SectionDataSize); // file size + } + Write32(0x7); // maxprot + Write32(0x7); // initprot + Write32(NumSections); + Write32(0); // flags + + assert(OS.tell() - Start == SegmentLoadCommandSize); + } + + void WriteSection(const MCAssembler &Asm, const MCSectionData &SD, + uint64_t FileOffset, uint64_t RelocationsStart, + unsigned NumRelocations) { + // The offset is unused for virtual sections. + if (Asm.getBackend().isVirtualSection(SD.getSection())) { + assert(SD.getFileSize() == 0 && "Invalid file size!"); + FileOffset = 0; + } + + // struct section (68 bytes) or + // struct section_64 (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + // FIXME: cast<> support! + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(SD.getSection()); + WriteBytes(Section.getSectionName(), 16); + WriteBytes(Section.getSegmentName(), 16); + if (Is64Bit) { + Write64(SD.getAddress()); // address + Write64(SD.getSize()); // size + } else { + Write32(SD.getAddress()); // address + Write32(SD.getSize()); // size + } + Write32(FileOffset); + + unsigned Flags = Section.getTypeAndAttributes(); + if (SD.hasInstructions()) + Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS; + + assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!"); + Write32(Log2_32(SD.getAlignment())); + Write32(NumRelocations ? RelocationsStart : 0); + Write32(NumRelocations); + Write32(Flags); + Write32(0); // reserved1 + Write32(Section.getStubSize()); // reserved2 + if (Is64Bit) + Write32(0); // reserved3 + + assert(OS.tell() - Start == Is64Bit ? Section64Size : Section32Size); + } + + void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols, + uint32_t StringTableOffset, + uint32_t StringTableSize) { + // struct symtab_command (24 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Symtab); + Write32(SymtabLoadCommandSize); + Write32(SymbolOffset); + Write32(NumSymbols); + Write32(StringTableOffset); + Write32(StringTableSize); + + assert(OS.tell() - Start == SymtabLoadCommandSize); + } + + void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, + uint32_t NumLocalSymbols, + uint32_t FirstExternalSymbol, + uint32_t NumExternalSymbols, + uint32_t FirstUndefinedSymbol, + uint32_t NumUndefinedSymbols, + uint32_t IndirectSymbolOffset, + uint32_t NumIndirectSymbols) { + // struct dysymtab_command (80 bytes) + + uint64_t Start = OS.tell(); + (void) Start; + + Write32(LCT_Dysymtab); + Write32(DysymtabLoadCommandSize); + Write32(FirstLocalSymbol); + Write32(NumLocalSymbols); + Write32(FirstExternalSymbol); + Write32(NumExternalSymbols); + Write32(FirstUndefinedSymbol); + Write32(NumUndefinedSymbols); + Write32(0); // tocoff + Write32(0); // ntoc + Write32(0); // modtaboff + Write32(0); // nmodtab + Write32(0); // extrefsymoff + Write32(0); // nextrefsyms + Write32(IndirectSymbolOffset); + Write32(NumIndirectSymbols); + Write32(0); // extreloff + Write32(0); // nextrel + Write32(0); // locreloff + Write32(0); // nlocrel + + assert(OS.tell() - Start == DysymtabLoadCommandSize); + } + + void WriteNlist(MachSymbolData &MSD) { + MCSymbolData &Data = *MSD.SymbolData; + const MCSymbol &Symbol = Data.getSymbol(); + uint8_t Type = 0; + uint16_t Flags = Data.getFlags(); + uint32_t Address = 0; + + // Set the N_TYPE bits. See <mach-o/nlist.h>. + // + // FIXME: Are the prebound or indirect fields possible here? + if (Symbol.isUndefined()) + Type = STT_Undefined; + else if (Symbol.isAbsolute()) + Type = STT_Absolute; + else + Type = STT_Section; + + // FIXME: Set STAB bits. + + if (Data.isPrivateExtern()) + Type |= STF_PrivateExtern; + + // Set external bit. + if (Data.isExternal() || Symbol.isUndefined()) + Type |= STF_External; + + // Compute the symbol address. + if (Symbol.isDefined()) { + if (Symbol.isAbsolute()) { + llvm_unreachable("FIXME: Not yet implemented!"); + } else { + Address = Data.getAddress(); + } + } else if (Data.isCommon()) { + // Common symbols are encoded with the size in the address + // field, and their alignment in the flags. + Address = Data.getCommonSize(); + + // Common alignment is packed into the 'desc' bits. + if (unsigned Align = Data.getCommonAlignment()) { + unsigned Log2Size = Log2_32(Align); + assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); + if (Log2Size > 15) + llvm_report_error("invalid 'common' alignment '" + + Twine(Align) + "'"); + // FIXME: Keep this mask with the SymbolFlags enumeration. + Flags = (Flags & 0xF0FF) | (Log2Size << 8); + } + } + + // struct nlist (12 bytes) + + Write32(MSD.StringIndex); + Write8(Type); + Write8(MSD.SectionIndex); + + // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' + // value. + Write16(Flags); + if (Is64Bit) + Write64(Address); + else + Write32(Address); + } + + void RecordX86_64Relocation(const MCAssembler &Asm, + const MCDataFragment &Fragment, + const MCAsmFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); + unsigned IsRIPRel = isFixupKindRIPRel(Fixup.Kind); + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + + // See <reloc.h>. + uint32_t Address = Fragment.getOffset() + Fixup.Offset; + int64_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + Value = Target.getConstant(); + + if (IsPCRel) { + // Compensate for the relocation offset, Darwin x86_64 relocations only + // have the addend and appear to have attempted to define it to be the + // actual expression addend without the PCrel bias. However, instructions + // with data following the relocation are not accomodated for (see comment + // below regarding SIGNED{1,2,4}), so it isn't exactly that either. + Value += 1 << Log2Size; + } + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + Type = RIT_X86_64_Unsigned; + Index = 0; + + // FIXME: I believe this is broken, I don't think the linker can + // understand it. I think it would require a local relocation, but I'm not + // sure if that would work either. The official way to get an absolute + // PCrel relocation is to use an absolute symbol (which we don't support + // yet). + if (IsPCRel) { + IsExtern = 1; + Type = RIT_X86_64_Branch; + } + } else if (Target.getSymB()) { // A - B + constant + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbolData *A_Base = Asm.getAtom(&A_SD); + + const MCSymbol *B = &Target.getSymB()->getSymbol(); + MCSymbolData &B_SD = Asm.getSymbolData(*B); + const MCSymbolData *B_Base = Asm.getAtom(&B_SD); + + // Neither symbol can be modified. + if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) + llvm_report_error("unsupported relocation of modified symbol"); + + // We don't support PCrel relocations of differences. Darwin 'as' doesn't + // implement most of these correctly. + if (IsPCRel) + llvm_report_error("unsupported pc-relative relocation of difference"); + + // We don't currently support any situation where one or both of the + // symbols would require a local relocation. This is almost certainly + // unused and may not be possible to encode correctly. + if (!A_Base || !B_Base) + llvm_report_error("unsupported local relocations in difference"); + + // Darwin 'as' doesn't emit correct relocations for this (it ends up with + // a single SIGNED relocation); reject it for now. + if (A_Base == B_Base) + llvm_report_error("unsupported relocation with identical base"); + + Value += A_SD.getAddress() - A_Base->getAddress(); + Value -= B_SD.getAddress() - B_Base->getAddress(); + + Index = A_Base->getIndex(); + IsExtern = 1; + Type = RIT_X86_64_Unsigned; + + MachRelocationEntry MRE; + MRE.Word0 = Address; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment.getParent()].push_back(MRE); + + Index = B_Base->getIndex(); + IsExtern = 1; + Type = RIT_X86_64_Subtractor; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData &SD = Asm.getSymbolData(*Symbol); + const MCSymbolData *Base = Asm.getAtom(&SD); + + // x86_64 almost always uses external relocations, except when there is no + // symbol to use as a base address (a local symbol with no preceeding + // non-local symbol). + if (Base) { + Index = Base->getIndex(); + IsExtern = 1; + + // Add the local offset, if needed. + if (Base != &SD) + Value += SD.getAddress() - Base->getAddress(); + } else { + // The index is the section ordinal. + // + // FIXME: O(N) + Index = 1; + MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); + for (; it != ie; ++it, ++Index) + if (&*it == SD.getFragment()->getParent()) + break; + assert(it != ie && "Unable to find section index!"); + IsExtern = 0; + Value += SD.getAddress(); + + if (IsPCRel) + Value -= Address + (1 << Log2Size); + } + + MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); + if (IsPCRel) { + if (IsRIPRel) { + if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) { + // x86_64 distinguishes movq foo@GOTPCREL so that the linker can + // rewrite the movq to an leaq at link time if the symbol ends up in + // the same linkage unit. + if (unsigned(Fixup.Kind) == X86::reloc_riprel_4byte_movq_load) + Type = RIT_X86_64_GOTLoad; + else + Type = RIT_X86_64_GOT; + } else if (Modifier != MCSymbolRefExpr::VK_None) + llvm_report_error("unsupported symbol modifier in relocation"); + else + Type = RIT_X86_64_Signed; + } else { + if (Modifier != MCSymbolRefExpr::VK_None) + llvm_report_error("unsupported symbol modifier in branch " + "relocation"); + + Type = RIT_X86_64_Branch; + } + + // The Darwin x86_64 relocation format has a problem where it cannot + // encode an address (L<foo> + <constant>) which is outside the atom + // containing L<foo>. Generally, this shouldn't occur but it does happen + // when we have a RIPrel instruction with data following the relocation + // entry (e.g., movb $012, L0(%rip)). Even with the PCrel adjustment + // Darwin x86_64 uses, the offset is still negative and the linker has + // no way to recognize this. + // + // To work around this, Darwin uses several special relocation types to + // indicate the offsets. However, the specification or implementation of + // these seems to also be incomplete; they should adjust the addend as + // well based on the actual encoded instruction (the additional bias), + // but instead appear to just look at the final offset. + if (IsRIPRel) { + switch (-(Target.getConstant() + (1 << Log2Size))) { + case 1: Type = RIT_X86_64_Signed1; break; + case 2: Type = RIT_X86_64_Signed2; break; + case 4: Type = RIT_X86_64_Signed4; break; + } + } + } else { + if (Modifier == MCSymbolRefExpr::VK_GOT) + Type = RIT_X86_64_GOT; + else if (Modifier != MCSymbolRefExpr::VK_None) + llvm_report_error("unsupported symbol modifier in relocation"); + else + Type = RIT_X86_64_Unsigned; + } + } + + // x86_64 always writes custom values into the fixups. + FixedValue = Value; + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Address; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment.getParent()].push_back(MRE); + } + + void RecordScatteredRelocation(const MCAssembler &Asm, + const MCFragment &Fragment, + const MCAsmFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + uint32_t Address = Fragment.getOffset() + Fixup.Offset; + unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + unsigned Type = RIT_Vanilla; + + // See <reloc.h>. + const MCSymbol *A = &Target.getSymA()->getSymbol(); + MCSymbolData *A_SD = &Asm.getSymbolData(*A); + + if (!A_SD->getFragment()) + llvm_report_error("symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + + uint32_t Value = A_SD->getAddress(); + uint32_t Value2 = 0; + + if (const MCSymbolRefExpr *B = Target.getSymB()) { + MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); + + if (!B_SD->getFragment()) + llvm_report_error("symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + + // Select the appropriate difference relocation type. + // + // Note that there is no longer any semantic difference between these two + // relocation types from the linkers point of view, this is done solely + // for pedantic compatibility with 'as'. + Type = A_SD->isExternal() ? RIT_Difference : RIT_LocalDifference; + Value2 = B_SD->getAddress(); + } + + // Relocations are written out in reverse order, so the PAIR comes first. + if (Type == RIT_Difference || Type == RIT_LocalDifference) { + MachRelocationEntry MRE; + MRE.Word0 = ((0 << 0) | + (RIT_Pair << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value2; + Relocations[Fragment.getParent()].push_back(MRE); + } + + MachRelocationEntry MRE; + MRE.Word0 = ((Address << 0) | + (Type << 24) | + (Log2Size << 28) | + (IsPCRel << 30) | + RF_Scattered); + MRE.Word1 = Value; + Relocations[Fragment.getParent()].push_back(MRE); + } + + void RecordRelocation(const MCAssembler &Asm, const MCDataFragment &Fragment, + const MCAsmFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + if (Is64Bit) { + RecordX86_64Relocation(Asm, Fragment, Fixup, Target, FixedValue); + return; + } + + unsigned IsPCRel = isFixupKindPCRel(Fixup.Kind); + unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind); + + // If this is a difference or a defined symbol plus an offset, then we need + // a scattered relocation entry. + uint32_t Offset = Target.getConstant(); + if (IsPCRel) + Offset += 1 << Log2Size; + if (Target.getSymB() || + (Target.getSymA() && !Target.getSymA()->getSymbol().isUndefined() && + Offset)) { + RecordScatteredRelocation(Asm, Fragment, Fixup, Target, FixedValue); + return; + } + + // See <reloc.h>. + uint32_t Address = Fragment.getOffset() + Fixup.Offset; + uint32_t Value = 0; + unsigned Index = 0; + unsigned IsExtern = 0; + unsigned Type = 0; + + if (Target.isAbsolute()) { // constant + // SymbolNum of 0 indicates the absolute section. + // + // FIXME: Currently, these are never generated (see code below). I cannot + // find a case where they are actually emitted. + Type = RIT_Vanilla; + Value = 0; + } else { + const MCSymbol *Symbol = &Target.getSymA()->getSymbol(); + MCSymbolData *SD = &Asm.getSymbolData(*Symbol); + + if (Symbol->isUndefined()) { + IsExtern = 1; + Index = SD->getIndex(); + Value = 0; + } else { + // The index is the section ordinal. + // + // FIXME: O(N) + Index = 1; + MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); + for (; it != ie; ++it, ++Index) + if (&*it == SD->getFragment()->getParent()) + break; + assert(it != ie && "Unable to find section index!"); + Value = SD->getAddress(); + } + + Type = RIT_Vanilla; + } + + // struct relocation_info (8 bytes) + MachRelocationEntry MRE; + MRE.Word0 = Address; + MRE.Word1 = ((Index << 0) | + (IsPCRel << 24) | + (Log2Size << 25) | + (IsExtern << 27) | + (Type << 28)); + Relocations[Fragment.getParent()].push_back(MRE); + } + + void BindIndirectSymbols(MCAssembler &Asm) { + // This is the point where 'as' creates actual symbols for indirect symbols + // (in the following two passes). It would be easier for us to do this + // sooner when we see the attribute, but that makes getting the order in the + // symbol table much more complicated than it is worth. + // + // FIXME: Revisit this when the dust settles. + + // Bind non lazy symbol pointers first. + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // FIXME: cast<> support! + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) + continue; + + Asm.getOrCreateSymbolData(*it->Symbol); + } + + // Then lazy symbol pointers and symbol stubs. + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // FIXME: cast<> support! + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) + continue; + + // Set the symbol type to undefined lazy, but only on construction. + // + // FIXME: Do not hardcode. + bool Created; + MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created); + if (Created) + Entry.setFlags(Entry.getFlags() | 0x0001); + } + } + + /// ComputeSymbolTable - Compute the symbol table data + /// + /// \param StringTable [out] - The string table data. + /// \param StringIndexMap [out] - Map from symbol names to offsets in the + /// string table. + void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable, + std::vector<MachSymbolData> &LocalSymbolData, + std::vector<MachSymbolData> &ExternalSymbolData, + std::vector<MachSymbolData> &UndefinedSymbolData) { + // Build section lookup table. + DenseMap<const MCSection*, uint8_t> SectionIndexMap; + unsigned Index = 1; + for (MCAssembler::iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it, ++Index) + SectionIndexMap[&it->getSection()] = Index; + assert(Index <= 256 && "Too many sections!"); + + // Index 0 is always the empty string. + StringMap<uint64_t> StringIndexMap; + StringTable += '\x00'; + + // Build the symbol arrays and the string table, but only for non-local + // symbols. + // + // The particular order that we collect the symbols and create the string + // table, then sort the symbols is chosen to match 'as'. Even though it + // doesn't matter for correctness, this is important for letting us diff .o + // files. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it)) + continue; + + if (!it->isExternal() && !Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isUndefined()) { + MSD.SectionIndex = 0; + UndefinedSymbolData.push_back(MSD); + } else if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + ExternalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + ExternalSymbolData.push_back(MSD); + } + } + + // Now add the data for local symbols. + for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), + ie = Asm.symbol_end(); it != ie; ++it) { + const MCSymbol &Symbol = it->getSymbol(); + + // Ignore non-linker visible symbols. + if (!Asm.isSymbolLinkerVisible(it)) + continue; + + if (it->isExternal() || Symbol.isUndefined()) + continue; + + uint64_t &Entry = StringIndexMap[Symbol.getName()]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Symbol.getName(); + StringTable += '\x00'; + } + + MachSymbolData MSD; + MSD.SymbolData = it; + MSD.StringIndex = Entry; + + if (Symbol.isAbsolute()) { + MSD.SectionIndex = 0; + LocalSymbolData.push_back(MSD); + } else { + MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection()); + assert(MSD.SectionIndex && "Invalid section index!"); + LocalSymbolData.push_back(MSD); + } + } + + // External and undefined symbols are required to be in lexicographic order. + std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end()); + std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end()); + + // Set the symbol indices. + Index = 0; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + LocalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + ExternalSymbolData[i].SymbolData->setIndex(Index++); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + UndefinedSymbolData[i].SymbolData->setIndex(Index++); + + // The string table is padded to a multiple of 4. + while (StringTable.size() % 4) + StringTable += '\x00'; + } + + void ExecutePostLayoutBinding(MCAssembler &Asm) { + // Create symbol data for any indirect symbols. + BindIndirectSymbols(Asm); + + // Compute symbol table information and bind symbol indices. + ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData, + UndefinedSymbolData); + } + + void WriteObject(const MCAssembler &Asm) { + unsigned NumSections = Asm.size(); + + // The section data starts after the header, the segment load command (and + // section headers) and the symbol table. + unsigned NumLoadCommands = 1; + uint64_t LoadCommandsSize = Is64Bit ? + SegmentLoadCommand64Size + NumSections * Section64Size : + SegmentLoadCommand32Size + NumSections * Section32Size; + + // Add the symbol table load command sizes, if used. + unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() + + UndefinedSymbolData.size(); + if (NumSymbols) { + NumLoadCommands += 2; + LoadCommandsSize += SymtabLoadCommandSize + DysymtabLoadCommandSize; + } + + // Compute the total size of the section data, as well as its file size and + // vm size. + uint64_t SectionDataStart = (Is64Bit ? Header64Size : Header32Size) + + LoadCommandsSize; + uint64_t SectionDataSize = 0; + uint64_t SectionDataFileSize = 0; + uint64_t VMSize = 0; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + const MCSectionData &SD = *it; + + VMSize = std::max(VMSize, SD.getAddress() + SD.getSize()); + + if (Asm.getBackend().isVirtualSection(SD.getSection())) + continue; + + SectionDataSize = std::max(SectionDataSize, + SD.getAddress() + SD.getSize()); + SectionDataFileSize = std::max(SectionDataFileSize, + SD.getAddress() + SD.getFileSize()); + } + + // The section data is padded to 4 bytes. + // + // FIXME: Is this machine dependent? + unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4); + SectionDataFileSize += SectionDataPadding; + + // Write the prolog, starting with the header and load command... + WriteHeader(NumLoadCommands, LoadCommandsSize, + Asm.getSubsectionsViaSymbols()); + WriteSegmentLoadCommand(NumSections, VMSize, + SectionDataStart, SectionDataSize); + + // ... and then the section headers. + uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + std::vector<MachRelocationEntry> &Relocs = Relocations[it]; + unsigned NumRelocs = Relocs.size(); + uint64_t SectionStart = SectionDataStart + it->getAddress(); + WriteSection(Asm, *it, SectionStart, RelocTableEnd, NumRelocs); + RelocTableEnd += NumRelocs * RelocationInfoSize; + } + + // Write the symbol table load command, if used. + if (NumSymbols) { + unsigned FirstLocalSymbol = 0; + unsigned NumLocalSymbols = LocalSymbolData.size(); + unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols; + unsigned NumExternalSymbols = ExternalSymbolData.size(); + unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols; + unsigned NumUndefinedSymbols = UndefinedSymbolData.size(); + unsigned NumIndirectSymbols = Asm.indirect_symbol_size(); + unsigned NumSymTabSymbols = + NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols; + uint64_t IndirectSymbolSize = NumIndirectSymbols * 4; + uint64_t IndirectSymbolOffset = 0; + + // If used, the indirect symbols are written after the section data. + if (NumIndirectSymbols) + IndirectSymbolOffset = RelocTableEnd; + + // The symbol table is written after the indirect symbol data. + uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize; + + // The string table is written after symbol table. + uint64_t StringTableOffset = + SymbolTableOffset + NumSymTabSymbols * (Is64Bit ? Nlist64Size : + Nlist32Size); + WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, + StringTableOffset, StringTable.size()); + + WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols, + FirstExternalSymbol, NumExternalSymbols, + FirstUndefinedSymbol, NumUndefinedSymbols, + IndirectSymbolOffset, NumIndirectSymbols); + } + + // Write the actual section data. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) + Asm.WriteSectionData(it, Writer); + + // Write the extra padding. + WriteZeros(SectionDataPadding); + + // Write the relocation entries. + for (MCAssembler::const_iterator it = Asm.begin(), + ie = Asm.end(); it != ie; ++it) { + // Write the section relocation entries, in reverse order to match 'as' + // (approximately, the exact algorithm is more complicated than this). + std::vector<MachRelocationEntry> &Relocs = Relocations[it]; + for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { + Write32(Relocs[e - i - 1].Word0); + Write32(Relocs[e - i - 1].Word1); + } + } + + // Write the symbol table data, if used. + if (NumSymbols) { + // Write the indirect symbol entries. + for (MCAssembler::const_indirect_symbol_iterator + it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + // Indirect symbols in the non lazy symbol pointer section have some + // special handling. + const MCSectionMachO &Section = + static_cast<const MCSectionMachO&>(it->SectionData->getSection()); + if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) { + // If this symbol is defined and internal, mark it as such. + if (it->Symbol->isDefined() && + !Asm.getSymbolData(*it->Symbol).isExternal()) { + uint32_t Flags = ISF_Local; + if (it->Symbol->isAbsolute()) + Flags |= ISF_Absolute; + Write32(Flags); + continue; + } + } + + Write32(Asm.getSymbolData(*it->Symbol).getIndex()); + } + + // FIXME: Check that offsets match computed ones. + + // Write the symbol table entries. + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) + WriteNlist(LocalSymbolData[i]); + for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i) + WriteNlist(ExternalSymbolData[i]); + for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i) + WriteNlist(UndefinedSymbolData[i]); + + // Write the string table. + OS << StringTable.str(); + } + } +}; + +} + +MachObjectWriter::MachObjectWriter(raw_ostream &OS, + bool Is64Bit, + bool IsLittleEndian) + : MCObjectWriter(OS, IsLittleEndian) +{ + Impl = new MachObjectWriterImpl(this, Is64Bit); +} + +MachObjectWriter::~MachObjectWriter() { + delete (MachObjectWriterImpl*) Impl; +} + +void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm) { + ((MachObjectWriterImpl*) Impl)->ExecutePostLayoutBinding(Asm); +} + +void MachObjectWriter::RecordRelocation(const MCAssembler &Asm, + const MCDataFragment &Fragment, + const MCAsmFixup &Fixup, MCValue Target, + uint64_t &FixedValue) { + ((MachObjectWriterImpl*) Impl)->RecordRelocation(Asm, Fragment, Fixup, + Target, FixedValue); +} + +void MachObjectWriter::WriteObject(const MCAssembler &Asm) { + ((MachObjectWriterImpl*) Impl)->WriteObject(Asm); +} diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp index 918d272..bbfddbe 100644 --- a/lib/MC/TargetAsmBackend.cpp +++ b/lib/MC/TargetAsmBackend.cpp @@ -11,7 +11,10 @@ using namespace llvm; TargetAsmBackend::TargetAsmBackend(const Target &T) - : TheTarget(T) + : TheTarget(T), + HasAbsolutizedSet(false), + HasReliableSymbolDifference(false), + HasScatteredSymbols(false) { } diff --git a/lib/Support/FoldingSet.cpp b/lib/Support/FoldingSet.cpp index 954dc77..3f467fe 100644 --- a/lib/Support/FoldingSet.cpp +++ b/lib/Support/FoldingSet.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/FoldingSet.h" +#include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include <cassert> @@ -130,6 +131,15 @@ bool FoldingSetNodeID::operator==(const FoldingSetNodeID &RHS)const{ return memcmp(&Bits[0], &RHS.Bits[0], Bits.size()*sizeof(Bits[0])) == 0; } +/// Intern - Copy this node's data to a memory region allocated from the +/// given allocator and return a FoldingSetNodeIDRef describing the +/// interned data. +FoldingSetNodeIDRef +FoldingSetNodeID::Intern(BumpPtrAllocator &Allocator) const { + unsigned *New = Allocator.Allocate<unsigned>(Bits.size()); + std::uninitialized_copy(Bits.begin(), Bits.end(), New); + return FoldingSetNodeIDRef(New, Bits.size()); +} //===----------------------------------------------------------------------===// /// Helper functions for FoldingSetImpl. diff --git a/lib/System/Unix/Signals.inc b/lib/System/Unix/Signals.inc index c8ec68a..56bf9e7 100644 --- a/lib/System/Unix/Signals.inc +++ b/lib/System/Unix/Signals.inc @@ -39,8 +39,8 @@ static SmartMutex<true> SignalsMutex; /// InterruptFunction - The function to call if ctrl-c is pressed. static void (*InterruptFunction)() = 0; -static std::vector<sys::Path> *FilesToRemove = 0; -static std::vector<std::pair<void(*)(void*), void*> > *CallBacksToRun = 0; +static std::vector<sys::Path> FilesToRemove; +static std::vector<std::pair<void(*)(void*), void*> > CallBacksToRun; // IntSigs - Signals that may interrupt the program at any time. static const int IntSigs[] = { @@ -126,11 +126,10 @@ static RETSIGTYPE SignalHandler(int Sig) { sigprocmask(SIG_UNBLOCK, &SigMask, 0); SignalsMutex.acquire(); - if (FilesToRemove != 0) - while (!FilesToRemove->empty()) { - FilesToRemove->back().eraseFromDisk(true); - FilesToRemove->pop_back(); - } + while (!FilesToRemove.empty()) { + FilesToRemove.back().eraseFromDisk(true); + FilesToRemove.pop_back(); + } if (std::find(IntSigs, IntSigsEnd, Sig) != IntSigsEnd) { if (InterruptFunction) { @@ -149,9 +148,8 @@ static RETSIGTYPE SignalHandler(int Sig) { SignalsMutex.release(); // Otherwise if it is a fault (like SEGV) run any handler. - if (CallBacksToRun) - for (unsigned i = 0, e = CallBacksToRun->size(); i != e; ++i) - (*CallBacksToRun)[i].first((*CallBacksToRun)[i].second); + for (unsigned i = 0, e = CallBacksToRun.size(); i != e; ++i) + CallBacksToRun[i].first(CallBacksToRun[i].second); } @@ -167,10 +165,7 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) { bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { SignalsMutex.acquire(); - if (FilesToRemove == 0) - FilesToRemove = new std::vector<sys::Path>(); - - FilesToRemove->push_back(Filename); + FilesToRemove.push_back(Filename); SignalsMutex.release(); @@ -182,9 +177,7 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, /// to the process. The handler can have a cookie passed to it to identify /// what instance of the handler it is. void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) { - if (CallBacksToRun == 0) - CallBacksToRun = new std::vector<std::pair<void(*)(void*), void*> >(); - CallBacksToRun->push_back(std::make_pair(FnPtr, Cookie)); + CallBacksToRun.push_back(std::make_pair(FnPtr, Cookie)); RegisterHandlers(); } diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index ddeb1b9..ea62c33 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -35,6 +35,10 @@ namespace ARM_AM { add = '+', sub = '-' }; + static inline const char *getAddrOpcStr(AddrOpc Op) { + return Op == sub ? "-" : ""; + } + static inline const char *getShiftOpcStr(ShiftOpc Op) { switch (Op) { default: assert(0 && "Unknown shift opc!"); @@ -78,16 +82,6 @@ namespace ARM_AM { } } - static inline const char *getAMSubModeAltStr(AMSubMode Mode, bool isLD) { - switch (Mode) { - default: assert(0 && "Unknown addressing sub-mode!"); - case ARM_AM::ia: return isLD ? "fd" : "ea"; - case ARM_AM::ib: return isLD ? "ed" : "fa"; - case ARM_AM::da: return isLD ? "fa" : "ed"; - case ARM_AM::db: return isLD ? "ea" : "fd"; - } - } - /// rotr32 - Rotate a 32-bit unsigned value right by a specified # bits. /// static inline unsigned rotr32(unsigned Val, unsigned Amt) { @@ -473,20 +467,13 @@ namespace ARM_AM { // IB - Increment before // DA - Decrement after // DB - Decrement before - // - // If the 4th bit (writeback)is set, then the base register is updated after - // the memory transfer. static inline AMSubMode getAM4SubMode(unsigned Mode) { return (AMSubMode)(Mode & 0x7); } - static inline unsigned getAM4ModeImm(AMSubMode SubMode, bool WB = false) { - return (int)SubMode | ((int)WB << 3); - } - - static inline bool getAM4WBFlag(unsigned Mode) { - return (Mode >> 3) & 1; + static inline unsigned getAM4ModeImm(AMSubMode SubMode) { + return (int)SubMode; } //===--------------------------------------------------------------------===// @@ -501,9 +488,9 @@ namespace ARM_AM { // operation in bit 8 and the immediate in bits 0-7. // // This is also used for FP load/store multiple ops. The second operand - // encodes the writeback mode in bit 8 and the number of registers (or 2 - // times the number of registers for DPR ops) in bits 0-7. In addition, - // bits 9-11 encode one of the following two sub-modes: + // encodes the number of registers (or 2 times the number of registers + // for DPR ops) in bits 0-7. In addition, bits 8-10 encode one of the + // following two sub-modes: // // IA - Increment after // DB - Decrement before @@ -522,17 +509,13 @@ namespace ARM_AM { /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and /// VSTM instructions. - static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, - unsigned char Offset) { + static inline unsigned getAM5Opc(AMSubMode SubMode, unsigned char Offset) { assert((SubMode == ia || SubMode == db) && "Illegal addressing mode 5 sub-mode!"); - return ((int)SubMode << 9) | ((int)WB << 8) | Offset; + return ((int)SubMode << 8) | Offset; } static inline AMSubMode getAM5SubMode(unsigned AM5Opc) { - return (AMSubMode)((AM5Opc >> 9) & 0x7); - } - static inline bool getAM5WBFlag(unsigned AM5Opc) { - return ((AM5Opc >> 8) & 1); + return (AMSubMode)((AM5Opc >> 8) & 0x7); } //===--------------------------------------------------------------------===// @@ -541,23 +524,11 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback and alignment + // addrmode6 := reg with optional alignment // - // This is stored in four operands [regaddr, regupdate, opc, align]. The - // first is the address register. The second register holds the value of - // a post-access increment for writeback or reg0 if no writeback or if the - // writeback increment is the size of the memory access. The third - // operand encodes whether there is writeback to the address register. The - // fourth operand is the value of the alignment specifier to use or zero if - // no explicit alignment. - - static inline unsigned getAM6Opc(bool WB = false) { - return (int)WB; - } - - static inline bool getAM6WBFlag(unsigned Mode) { - return Mode & 1; - } + // This is stored in two operands [regaddr, align]. The first is the + // address register. The second operand is the value of the alignment + // specifier to use or zero if no explicit alignment. } // end namespace ARM_AM } // end namespace llvm diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 8e537d8..e6ea03a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -650,39 +650,49 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (SrcRC == ARM::tGPRRegisterClass) SrcRC = ARM::GPRRegisterClass; - if (DestRC != SrcRC) { - if (DestRC->getSize() != SrcRC->getSize()) - return false; + // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. + if (DestRC == ARM::DPR_8RegisterClass) + DestRC = ARM::DPR_VFP2RegisterClass; + if (SrcRC == ARM::DPR_8RegisterClass) + SrcRC = ARM::DPR_VFP2RegisterClass; + + // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. + if (DestRC == ARM::QPR_VFP2RegisterClass || + DestRC == ARM::QPR_8RegisterClass) + DestRC = ARM::QPRRegisterClass; + if (SrcRC == ARM::QPR_VFP2RegisterClass || + SrcRC == ARM::QPR_8RegisterClass) + SrcRC = ARM::QPRRegisterClass; + + // Disallow copies of unequal sizes. + if (DestRC != SrcRC && DestRC->getSize() != SrcRC->getSize()) + return false; - // Allow DPR / DPR_VFP2 / DPR_8 cross-class copies. - // Allow QPR / QPR_VFP2 / QPR_8 cross-class copies. - if (DestRC->getSize() != 8 && DestRC->getSize() != 16) + if (DestRC == ARM::GPRRegisterClass) { + if (SrcRC == ARM::SPRRegisterClass) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVRS), DestReg) + .addReg(SrcReg)); + else + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), + DestReg).addReg(SrcReg))); + } else { + unsigned Opc; + + if (DestRC == ARM::SPRRegisterClass) + Opc = (SrcRC == ARM::GPRRegisterClass ? ARM::VMOVSR : ARM::VMOVS); + else if (DestRC == ARM::DPRRegisterClass) + Opc = ARM::VMOVD; + else if (DestRC == ARM::DPR_VFP2RegisterClass || + SrcRC == ARM::DPR_VFP2RegisterClass) + // Always use neon reg-reg move if source or dest is NEON-only regclass. + Opc = ARM::VMOVDneon; + else if (DestRC == ARM::QPRRegisterClass) + Opc = ARM::VMOVQ; + else return false; - } - if (DestRC == ARM::GPRRegisterClass) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), - DestReg).addReg(SrcReg))); - } else if (DestRC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg) - .addReg(SrcReg)); - } else if (DestRC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(Opc), DestReg) .addReg(SrcReg)); - } else if (DestRC == ARM::DPR_VFP2RegisterClass || - DestRC == ARM::DPR_8RegisterClass || - SrcRC == ARM::DPR_VFP2RegisterClass || - SrcRC == ARM::DPR_8RegisterClass) { - // Always use neon reg-reg move if source or dest is NEON-only regclass. - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVDneon), - DestReg).addReg(SrcReg)); - } else if (DestRC == ARM::QPRRegisterClass || - DestRC == ARM::QPR_VFP2RegisterClass || - DestRC == ARM::QPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVQ), - DestReg).addReg(SrcReg)); - } else { - return false; } return true; @@ -727,10 +737,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - if (Align >= 16 - && (getRegisterInfo().canRealignStack(MF))) { + if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64)) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO) .addReg(SrcReg, getKillRegState(isKill))); } else { @@ -780,7 +789,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (Align >= 16 && (getRegisterInfo().canRealignStack(MF))) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) - .addFrameIndex(FI).addImm(0).addImm(0).addImm(128) + .addFrameIndex(FI).addImm(128) .addMemOperand(MMO)); } else { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 767d5ec..292c498 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -93,34 +93,34 @@ namespace ARMII { StMiscFrm = 9 << FormShift, LdStMulFrm = 10 << FormShift, - LdStExFrm = 28 << FormShift, + LdStExFrm = 11 << FormShift, // Miscellaneous arithmetic instructions - ArithMiscFrm = 11 << FormShift, + ArithMiscFrm = 12 << FormShift, // Extend instructions - ExtFrm = 12 << FormShift, + ExtFrm = 13 << FormShift, // VFP formats - VFPUnaryFrm = 13 << FormShift, - VFPBinaryFrm = 14 << FormShift, - VFPConv1Frm = 15 << FormShift, - VFPConv2Frm = 16 << FormShift, - VFPConv3Frm = 17 << FormShift, - VFPConv4Frm = 18 << FormShift, - VFPConv5Frm = 19 << FormShift, - VFPLdStFrm = 20 << FormShift, - VFPLdStMulFrm = 21 << FormShift, - VFPMiscFrm = 22 << FormShift, + VFPUnaryFrm = 14 << FormShift, + VFPBinaryFrm = 15 << FormShift, + VFPConv1Frm = 16 << FormShift, + VFPConv2Frm = 17 << FormShift, + VFPConv3Frm = 18 << FormShift, + VFPConv4Frm = 19 << FormShift, + VFPConv5Frm = 20 << FormShift, + VFPLdStFrm = 21 << FormShift, + VFPLdStMulFrm = 22 << FormShift, + VFPMiscFrm = 23 << FormShift, // Thumb format - ThumbFrm = 23 << FormShift, + ThumbFrm = 24 << FormShift, // NEON format - NEONFrm = 24 << FormShift, - NEONGetLnFrm = 25 << FormShift, - NEONSetLnFrm = 26 << FormShift, - NEONDupFrm = 27 << FormShift, + NEONFrm = 25 << FormShift, + NEONGetLnFrm = 26 << FormShift, + NEONSetLnFrm = 27 << FormShift, + NEONDupFrm = 28 << FormShift, //===------------------------------------------------------------------===// // Misc flags. diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 11e1c48..b380c95 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -80,7 +80,7 @@ unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, case D23: return 23; case D24: return 24; case D25: return 25; - case D26: return 27; + case D26: return 26; case D27: return 27; case D28: return 28; case D29: return 29; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 334c820..e7aa0c8 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -55,12 +55,12 @@ namespace { const std::vector<MachineConstantPoolEntry> *MCPEs; const std::vector<MachineJumpTableEntry> *MJTEs; bool IsPIC; - + void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<MachineModuleInfo>(); MachineFunctionPass::getAnalysisUsage(AU); } - + static char ID; public: ARMCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) @@ -68,7 +68,7 @@ namespace { TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), IsPIC(TM.getRelocationModel() == Reloc::PIC_) {} - + /// getBinaryCodeForInstr - This function, generated by the /// CodeEmitterGenerator using TableGen, produces the binary encoding for /// machine instructions. @@ -163,7 +163,7 @@ namespace { char ARMCodeEmitter::ID = 0; -/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM +/// createARMJITCodeEmitterPass - Return a pass that emits the collected ARM /// code to the specified MCE object. FunctionPass *llvm::createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM, JITCodeEmitter &JCE) { @@ -617,8 +617,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) { } } -unsigned ARMCodeEmitter::getMachineSoRegOpValue( - const MachineInstr &MI, +unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI, const TargetInstrDesc &TID, const MachineOperand &MO, unsigned OpIdx) { @@ -690,7 +689,7 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) { } unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, - const TargetInstrDesc &TID) const { + const TargetInstrDesc &TID) const { for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i != e; --i){ const MachineOperand &MO = MI.getOperand(i-1); if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) @@ -699,8 +698,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI, return 0; } -void ARMCodeEmitter::emitDataProcessingInstruction( - const MachineInstr &MI, +void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); @@ -765,8 +763,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction( emitWordLE(Binary); } -void ARMCodeEmitter::emitLoadStoreInstruction( - const MachineInstr &MI, +void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI, unsigned ImplicitRd, unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); @@ -841,7 +838,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction( } void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, - unsigned ImplicitRn) { + unsigned ImplicitRn) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0; @@ -950,7 +947,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) { Binary |= getAddrModeUPBits(ARM_AM::getAM4SubMode(MO.getImm())); // Set bit W(21) - if (ARM_AM::getAM4WBFlag(MO.getImm())) + if (IsUpdating) Binary |= 0x1 << ARMII::W_BitShift; // Set registers @@ -1238,8 +1235,7 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitVFPConversionInstruction( - const MachineInstr &MI) { +void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); unsigned Form = TID.TSFlags & ARMII::FormMask; @@ -1329,8 +1325,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) { emitWordLE(Binary); } -void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( - const MachineInstr &MI) { +void +ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) { const TargetInstrDesc &TID = MI.getDesc(); bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0; @@ -1353,7 +1349,7 @@ void ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction( Binary |= getAddrModeUPBits(ARM_AM::getAM5SubMode(MO.getImm())); // Set bit W(21) - if (ARM_AM::getAM5WBFlag(MO.getImm())) + if (IsUpdating) Binary |= 0x1 << ARMII::W_BitShift; // First register is encoded in Dd. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 013e00a..71207c8 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -80,8 +80,7 @@ public: SDValue &Mode); bool SelectAddrMode5(SDNode *Op, SDValue N, SDValue &Base, SDValue &Offset); - bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align); + bool SelectAddrMode6(SDNode *Op, SDValue N, SDValue &Addr, SDValue &Align); bool SelectAddrModePC(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -502,12 +501,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDNode *Op, SDValue N, } bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Op, SDValue N, - SDValue &Addr, SDValue &Update, - SDValue &Opc, SDValue &Align) { + SDValue &Addr, SDValue &Align) { Addr = N; - // Default to no writeback. - Update = CurDAG->getRegister(0, MVT::i32); - Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); // Default to no alignment. Align = CurDAG->getTargetConstant(0, MVT::i32); return true; @@ -1030,8 +1025,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VLD NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1055,14 +1050,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1070,11 +1064,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; + const SDValue Ops[] = { MemAddr, Align, Pred, Reg0, Chain }; std::vector<EVT> ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 7); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1086,25 +1079,21 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, unsigned NumVecs, // Otherwise, quad registers are loaded with two separate instructions, // where one loads the even registers and the other loads the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); - std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, - Pred, PredReg, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 7); + const SDValue OpsA[] = { MemAddr, Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 6); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, - Align, Pred, PredReg, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 7); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), + Align, Reg0, Pred, Reg0, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 6); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1123,8 +1112,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1148,12 +1137,10 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 8> Ops; + SmallVector<SDValue, 10> Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); if (is64BitVector) { @@ -1161,9 +1148,9 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+7); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1178,40 +1165,37 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 11); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, // where one stores the even registers and the other stores the odd registers. - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + Ops.push_back(Reg0); // post-access address offset // Store the even subregs. for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, N->getOperand(Vec+3))); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); // predicate register Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+4] = Pred; - Ops[NumVecs+5] = PredReg; - Ops[NumVecs+6] = Chain; + Ops[NumVecs+5] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+7); + MVT::Other, Ops.data(), NumVecs+6); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1224,8 +1208,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc, Align; - if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) + SDValue MemAddr, Align; + if (!SelectAddrMode6(N, N->getOperand(2), MemAddr, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1259,12 +1243,10 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } SDValue Pred = CurDAG->getTargetConstant(14, MVT::i32); - SDValue PredReg = CurDAG->getRegister(0, MVT::i32); + SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); - SmallVector<SDValue, 9> Ops; + SmallVector<SDValue, 10> Ops; Ops.push_back(MemAddr); - Ops.push_back(MemUpdate); - Ops.push_back(MemOpc); Ops.push_back(Align); unsigned Opc = 0; @@ -1287,16 +1269,16 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, } Ops.push_back(getI32Imm(Lane)); Ops.push_back(Pred); - Ops.push_back(PredReg); + Ops.push_back(Reg0); Ops.push_back(Chain); if (!IsLoad) - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+6); std::vector<EVT> ResTys(NumVecs, RegVT); ResTys.push_back(MVT::Other); SDNode *VLdLn = - CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+8); + CurDAG->getMachineNode(Opc, dl, ResTys, Ops.data(), NumVecs+6); // For a 64-bit vector load to D registers, nothing more needs to be done. if (is64BitVector) return VLdLn; @@ -1859,37 +1841,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vld3: { unsigned DOpcodes[] = { ARM::VLD3d8, ARM::VLD3d16, ARM::VLD3d32, ARM::VLD3d64 }; - unsigned QOpcodes0[] = { ARM::VLD3q8a, ARM::VLD3q16a, ARM::VLD3q32a }; - unsigned QOpcodes1[] = { ARM::VLD3q8b, ARM::VLD3q16b, ARM::VLD3q32b }; + unsigned QOpcodes0[] = { ARM::VLD3q8_UPD, + ARM::VLD3q16_UPD, + ARM::VLD3q32_UPD }; + unsigned QOpcodes1[] = { ARM::VLD3q8odd_UPD, + ARM::VLD3q16odd_UPD, + ARM::VLD3q32odd_UPD }; return SelectVLD(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4: { unsigned DOpcodes[] = { ARM::VLD4d8, ARM::VLD4d16, ARM::VLD4d32, ARM::VLD4d64 }; - unsigned QOpcodes0[] = { ARM::VLD4q8a, ARM::VLD4q16a, ARM::VLD4q32a }; - unsigned QOpcodes1[] = { ARM::VLD4q8b, ARM::VLD4q16b, ARM::VLD4q32b }; + unsigned QOpcodes0[] = { ARM::VLD4q8_UPD, + ARM::VLD4q16_UPD, + ARM::VLD4q32_UPD }; + unsigned QOpcodes1[] = { ARM::VLD4q8odd_UPD, + ARM::VLD4q16odd_UPD, + ARM::VLD4q32odd_UPD }; return SelectVLD(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld2lane: { unsigned DOpcodes[] = { ARM::VLD2LNd8, ARM::VLD2LNd16, ARM::VLD2LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD2LNq16a, ARM::VLD2LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD2LNq16b, ARM::VLD2LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD2LNq16, ARM::VLD2LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD2LNq16odd, ARM::VLD2LNq32odd }; return SelectVLDSTLane(N, true, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld3lane: { unsigned DOpcodes[] = { ARM::VLD3LNd8, ARM::VLD3LNd16, ARM::VLD3LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD3LNq16a, ARM::VLD3LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD3LNq16b, ARM::VLD3LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD3LNq16, ARM::VLD3LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD3LNq16odd, ARM::VLD3LNq32odd }; return SelectVLDSTLane(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vld4lane: { unsigned DOpcodes[] = { ARM::VLD4LNd8, ARM::VLD4LNd16, ARM::VLD4LNd32 }; - unsigned QOpcodes0[] = { ARM::VLD4LNq16a, ARM::VLD4LNq32a }; - unsigned QOpcodes1[] = { ARM::VLD4LNq16b, ARM::VLD4LNq32b }; + unsigned QOpcodes0[] = { ARM::VLD4LNq16, ARM::VLD4LNq32 }; + unsigned QOpcodes1[] = { ARM::VLD4LNq16odd, ARM::VLD4LNq32odd }; return SelectVLDSTLane(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); } @@ -1903,37 +1893,45 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_neon_vst3: { unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, ARM::VST3d32, ARM::VST3d64 }; - unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a }; - unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b }; + unsigned QOpcodes0[] = { ARM::VST3q8_UPD, + ARM::VST3q16_UPD, + ARM::VST3q32_UPD }; + unsigned QOpcodes1[] = { ARM::VST3q8odd_UPD, + ARM::VST3q16odd_UPD, + ARM::VST3q32odd_UPD }; return SelectVST(N, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, ARM::VST4d32, ARM::VST4d64 }; - unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a }; - unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b }; + unsigned QOpcodes0[] = { ARM::VST4q8_UPD, + ARM::VST4q16_UPD, + ARM::VST4q32_UPD }; + unsigned QOpcodes1[] = { ARM::VST4q8odd_UPD, + ARM::VST4q16odd_UPD, + ARM::VST4q32odd_UPD }; return SelectVST(N, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { unsigned DOpcodes[] = { ARM::VST2LNd8, ARM::VST2LNd16, ARM::VST2LNd32 }; - unsigned QOpcodes0[] = { ARM::VST2LNq16a, ARM::VST2LNq32a }; - unsigned QOpcodes1[] = { ARM::VST2LNq16b, ARM::VST2LNq32b }; + unsigned QOpcodes0[] = { ARM::VST2LNq16, ARM::VST2LNq32 }; + unsigned QOpcodes1[] = { ARM::VST2LNq16odd, ARM::VST2LNq32odd }; return SelectVLDSTLane(N, false, 2, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst3lane: { unsigned DOpcodes[] = { ARM::VST3LNd8, ARM::VST3LNd16, ARM::VST3LNd32 }; - unsigned QOpcodes0[] = { ARM::VST3LNq16a, ARM::VST3LNq32a }; - unsigned QOpcodes1[] = { ARM::VST3LNq16b, ARM::VST3LNq32b }; + unsigned QOpcodes0[] = { ARM::VST3LNq16, ARM::VST3LNq32 }; + unsigned QOpcodes1[] = { ARM::VST3LNq16odd, ARM::VST3LNq32odd }; return SelectVLDSTLane(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4lane: { unsigned DOpcodes[] = { ARM::VST4LNd8, ARM::VST4LNd16, ARM::VST4LNd32 }; - unsigned QOpcodes0[] = { ARM::VST4LNq16a, ARM::VST4LNq32a }; - unsigned QOpcodes1[] = { ARM::VST4LNq16b, ARM::VST4LNq32b }; + unsigned QOpcodes0[] = { ARM::VST4LNq16, ARM::VST4LNq32 }; + unsigned QOpcodes1[] = { ARM::VST4LNq16odd, ARM::VST4LNq32odd }; return SelectVLDSTLane(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); } } diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8f20843..0d0a004 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -436,9 +436,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); } // Special handling for half-precision FP. - if (Subtarget->hasVFP3() && Subtarget->hasFP16()) { - setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Custom); - setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Custom); + if (!Subtarget->hasFP16()) { + setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); + setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } @@ -499,8 +499,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::FTOUI: return "ARMISD::FTOUI"; case ARMISD::SITOF: return "ARMISD::SITOF"; case ARMISD::UITOF: return "ARMISD::UITOF"; - case ARMISD::F16_TO_F32: return "ARMISD::F16_TO_F32"; - case ARMISD::F32_TO_F16: return "ARMISD::F32_TO_F16"; case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; @@ -1987,9 +1985,6 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Invalid opcode!"); - case ISD::FP32_TO_FP16: - Opc = ARMISD::F32_TO_F16; - break; case ISD::FP_TO_SINT: Opc = ARMISD::FTOSI; break; @@ -2009,9 +2004,6 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { switch (Op.getOpcode()) { default: assert(0 && "Invalid opcode!"); - case ISD::FP16_TO_FP32: - Opc = ARMISD::F16_TO_F32; - break; case ISD::SINT_TO_FP: Opc = ARMISD::SITOF; break; @@ -3078,10 +3070,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); - case ISD::FP16_TO_FP32: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); - case ISD::FP32_TO_FP16: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index d7b2ba3..f8f8adc 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -59,8 +59,6 @@ namespace llvm { FTOUI, // FP to uint within a FP register. SITOF, // sint to FP within a FP register. UITOF, // uint to FP within a FP register. - F16_TO_F32, // Half FP to single FP within a FP register. - F32_TO_F16, // Single FP to half FP within a FP register. SRL_FLAG, // V,Flag = srl_flag X -> srl X, 1 + save carry out. SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 258a96b..4f6f05d 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -15,8 +15,8 @@ // Format specifies the encoding used by the instruction. This is part of the // ad-hoc solution used to emit machine instruction encodings by our machine // code emitter. -class Format<bits<5> val> { - bits<5> Value = val; +class Format<bits<6> val> { + bits<6> Value = val; } def Pseudo : Format<0>; @@ -33,32 +33,34 @@ def LdMiscFrm : Format<8>; def StMiscFrm : Format<9>; def LdStMulFrm : Format<10>; -def LdStExFrm : Format<28>; +def LdStExFrm : Format<11>; -def ArithMiscFrm : Format<11>; -def ExtFrm : Format<12>; +def ArithMiscFrm : Format<12>; +def ExtFrm : Format<13>; -def VFPUnaryFrm : Format<13>; -def VFPBinaryFrm : Format<14>; -def VFPConv1Frm : Format<15>; -def VFPConv2Frm : Format<16>; -def VFPConv3Frm : Format<17>; -def VFPConv4Frm : Format<18>; -def VFPConv5Frm : Format<19>; -def VFPLdStFrm : Format<20>; -def VFPLdStMulFrm : Format<21>; -def VFPMiscFrm : Format<22>; +def VFPUnaryFrm : Format<14>; +def VFPBinaryFrm : Format<15>; +def VFPConv1Frm : Format<16>; +def VFPConv2Frm : Format<17>; +def VFPConv3Frm : Format<18>; +def VFPConv4Frm : Format<19>; +def VFPConv5Frm : Format<20>; +def VFPLdStFrm : Format<21>; +def VFPLdStMulFrm : Format<22>; +def VFPMiscFrm : Format<23>; -def ThumbFrm : Format<23>; +def ThumbFrm : Format<24>; -def NEONFrm : Format<24>; -def NEONGetLnFrm : Format<25>; -def NEONSetLnFrm : Format<26>; -def NEONDupFrm : Format<27>; +def NEONFrm : Format<25>; +def NEONGetLnFrm : Format<26>; +def NEONSetLnFrm : Format<27>; +def NEONDupFrm : Format<28>; def MiscFrm : Format<29>; def ThumbMiscFrm : Format<30>; +def NLdStFrm : Format<31>; + // Misc flags. // the instruction has a Rn register operand. @@ -71,7 +73,7 @@ class UnaryDP { bit isUnaryDataProc = 1; } class Xform16Bit { bit canXformTo16Bit = 1; } //===----------------------------------------------------------------------===// -// ARM Instruction flags. These need to match ARMInstrInfo.h. +// ARM Instruction flags. These need to match ARMBaseInstrInfo.h. // // Addressing mode. @@ -183,7 +185,7 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im, bits<2> IndexModeBits = IM.Value; Format F = f; - bits<5> Form = F.Value; + bits<6> Form = F.Value; Domain D = d; bits<2> Dom = D.Value; @@ -229,7 +231,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; @@ -257,7 +259,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, list<dag> pattern> : InstARM<am, sz, im, f, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${p}${s}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsARM]; @@ -1007,8 +1009,8 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { - let OutOperandList = !con(oops, (ops s_cc_out:$s)); - let InOperandList = !con(iops, (ops pred:$p)); + let OutOperandList = !con(oops, (outs s_cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; @@ -1030,7 +1032,7 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb1Only]; @@ -1109,7 +1111,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1125,7 +1127,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p, cc_out:$s)); + let InOperandList = !con(iops, (ins pred:$p, cc_out:$s)); let AsmString = !strconcat(opc, !strconcat("${s}${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1209,7 +1211,7 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [IsThumb2]; @@ -1265,7 +1267,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, sz, im, f, VFPDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(opc, !strconcat("${p}", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasVFP2]; @@ -1464,11 +1466,12 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops, // ARM NEON Instruction templates. // -class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, - string opc, string dt, string asm, string cstr, list<dag> pattern> - : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { +class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f, + InstrItinClass itin, string opc, string dt, string asm, string cstr, + list<dag> pattern> + : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat( !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), !strconcat("\t", asm)); @@ -1481,7 +1484,7 @@ class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, InstrItinClass itin, string opc, string asm, string cstr, list<dag> pattern> : InstARM<am, Size4Bytes, im, NEONFrm, NeonDomain, cstr, itin> { let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat(!strconcat(opc, "${p}"), !strconcat("\t", asm)); let Pattern = pattern; list<Predicate> Predicates = [HasNEON]; @@ -1502,8 +1505,8 @@ class NI4<dag oops, dag iops, InstrItinClass itin, string opc, class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrMode6, IndexModeNone, itin, opc, dt, asm, cstr, - pattern> { + : NeonI<oops, iops, AddrMode6, IndexModeNone, NLdStFrm, itin, opc, dt, asm, + cstr, pattern> { let Inst{31-24} = 0b11110100; let Inst{23} = op23; let Inst{21-20} = op21_20; @@ -1513,7 +1516,7 @@ class NLdSt<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4, class NDataI<dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list<dag> pattern> - : NeonI<oops, iops, AddrModeNone, IndexModeNone, itin, opc, dt, asm, + : NeonI<oops, iops, AddrModeNone, IndexModeNone, NEONFrm, itin, opc, dt, asm, cstr, pattern> { let Inst{31-25} = 0b1111001; } @@ -1621,7 +1624,7 @@ class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3, let Inst{4} = 1; let OutOperandList = oops; - let InOperandList = !con(iops, (ops pred:$p)); + let InOperandList = !con(iops, (ins pred:$p)); let AsmString = !strconcat( !strconcat(!strconcat(opc, "${p}"), !strconcat(".", dt)), !strconcat("\t", asm)); diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 3fc37da..26a2806 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -62,11 +62,14 @@ def ARMcallseq_end : SDNode<"ISD::CALLSEQ_END", SDT_ARMCallSeqEnd, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def ARMcall : SDNode<"ARMISD::CALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMcall_pred : SDNode<"ARMISD::CALL_PRED", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -282,7 +285,7 @@ def pclabel : Operand<i32> { // shifter_operand operands: so_reg and so_imm. def so_reg : Operand<i32>, // reg reg imm - ComplexPattern<i32, 3, "SelectShifterOperandReg", + ComplexPattern<i32, 3, "SelectShifterOperandReg", [shl,srl,sra,rotr]> { let PrintMethod = "printSORegOperand"; let MIOperandInfo = (ops GPR, GPR, i32imm); @@ -392,9 +395,14 @@ def addrmode5 : Operand<i32>, // addrmode6 := reg with optional writeback // def addrmode6 : Operand<i32>, - ComplexPattern<i32, 4, "SelectAddrMode6", []> { + ComplexPattern<i32, 2, "SelectAddrMode6", []> { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); + let MIOperandInfo = (ops GPR:$addr, i32imm); +} + +def am6offset : Operand<i32> { + let PrintMethod = "printAddrMode6OffsetOperand"; + let MIOperandInfo = (ops GPR); } // addrmodepc := pc + reg @@ -909,7 +917,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, def LDM_RET : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, LdStMulFrm, IIC_Br, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -1354,7 +1362,7 @@ def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, def LDM_UPD : AXI4ld<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, LdStMulFrm, IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>; } // mayLoad, hasExtraDefRegAllocReq @@ -1367,7 +1375,7 @@ def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, def STM_UPD : AXI4st<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, LdStMulFrm, IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", + "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>; } // mayStore, hasExtraSrcRegAllocReq diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8fee6fa..c977cc3 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -138,214 +138,360 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, } // VLD1 : Vector Load (multiple single elements) -class VLD1D<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> +class VLD1D<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, Dt, "\\{$dst\\}, $addr", "", - [(set DPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; -class VLD1Q<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> + "vld1", Dt, "\\{$dst\\}, $addr", "", + [(set DPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; +class VLD1Q<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst), (ins addrmode6:$addr), IIC_VLD1, - OpcodeStr, Dt, "${dst:dregpair}, $addr", "", - [(set QPR:$dst, (Ty (IntOp addrmode6:$addr)))]>; + "vld1", Dt, "${dst:dregpair}, $addr", "", + [(set QPR:$dst, (Ty (int_arm_neon_vld1 addrmode6:$addr)))]>; + +def VLD1d8 : VLD1D<0b0000, "8", v8i8>; +def VLD1d16 : VLD1D<0b0100, "16", v4i16>; +def VLD1d32 : VLD1D<0b1000, "32", v2i32>; +def VLD1df : VLD1D<0b1000, "32", v2f32>; +def VLD1d64 : VLD1D<0b1100, "64", v1i64>; + +def VLD1q8 : VLD1Q<0b0000, "8", v16i8>; +def VLD1q16 : VLD1Q<0b0100, "16", v8i16>; +def VLD1q32 : VLD1Q<0b1000, "32", v4i32>; +def VLD1qf : VLD1Q<0b1000, "32", v4f32>; +def VLD1q64 : VLD1Q<0b1100, "64", v2i64>; + +let mayLoad = 1 in { + +// ...with address register writeback: +class VLD1DWB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "\\{$dst\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VLD1QWB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b1010,op7_4, (outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", Dt, "${dst:dregpair}, $addr$offset", + "$addr.addr = $wb", []>; -def VLD1d8 : VLD1D<0b0000, "vld1", "8", v8i8, int_arm_neon_vld1>; -def VLD1d16 : VLD1D<0b0100, "vld1", "16", v4i16, int_arm_neon_vld1>; -def VLD1d32 : VLD1D<0b1000, "vld1", "32", v2i32, int_arm_neon_vld1>; -def VLD1df : VLD1D<0b1000, "vld1", "32", v2f32, int_arm_neon_vld1>; -def VLD1d64 : VLD1D<0b1100, "vld1", "64", v1i64, int_arm_neon_vld1>; +def VLD1d8_UPD : VLD1DWB<0b0000, "8">; +def VLD1d16_UPD : VLD1DWB<0b0100, "16">; +def VLD1d32_UPD : VLD1DWB<0b1000, "32">; +def VLD1d64_UPD : VLD1DWB<0b1100, "64">; -def VLD1q8 : VLD1Q<0b0000, "vld1", "8", v16i8, int_arm_neon_vld1>; -def VLD1q16 : VLD1Q<0b0100, "vld1", "16", v8i16, int_arm_neon_vld1>; -def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>; -def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>; -def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>; +def VLD1q8_UPD : VLD1QWB<0b0000, "8">; +def VLD1q16_UPD : VLD1QWB<0b0100, "16">; +def VLD1q32_UPD : VLD1QWB<0b1000, "32">; +def VLD1q64_UPD : VLD1QWB<0b1100, "64">; +} // mayLoad = 1 + +let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // These (dreg triple/quadruple) are for disassembly only. -class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, +class VLD1D3<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt> +class VLD1D4<bits<4> op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt, + (ins addrmode6:$addr), IIC_VLD1, "vld1", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -def VLD1d8T : VLD1D3<0b0000, "vld1", "8">; -def VLD1d16T : VLD1D3<0b0100, "vld1", "16">; -def VLD1d32T : VLD1D3<0b1000, "vld1", "32">; -//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">; - -def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">; -def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">; -def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">; -//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">; +def VLD1d8T : VLD1D3<0b0000, "8">; +def VLD1d16T : VLD1D3<0b0100, "16">; +def VLD1d32T : VLD1D3<0b1000, "32">; +// VLD1d64T : implemented as VLD3d64 + +def VLD1d8Q : VLD1D4<0b0000, "8">; +def VLD1d16Q : VLD1D4<0b0100, "16">; +def VLD1d32Q : VLD1D4<0b1000, "32">; +// VLD1d64Q : implemented as VLD4d64 + +// ...with address register writeback: +class VLD1D3WB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3\\}, $addr$offset", "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +class VLD1D4WB<bits<4> op7_4, string Dt> + : NLdSt<0,0b10,0b0010,op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, "vld1", Dt, + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +def VLD1d8T_UPD : VLD1D3WB<0b0000, "8">; +def VLD1d16T_UPD : VLD1D3WB<0b0100, "16">; +def VLD1d32T_UPD : VLD1D3WB<0b1000, "32">; +// VLD1d64T_UPD : implemented as VLD3d64_UPD -let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { +def VLD1d8Q_UPD : VLD1D4WB<0b0000, "8">; +def VLD1d16Q_UPD : VLD1D4WB<0b0100, "16">; +def VLD1d32Q_UPD : VLD1D4WB<0b1000, "32">; +// VLD1d64Q_UPD : implemented as VLD4d64_UPD // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b1000,op7_4, (outs DPR:$dst1, DPR:$dst2), +class VLD2D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; -class VLD2Q<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0011,op7_4, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; +class VLD2Q<bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, 0b0011, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -def VLD2d8 : VLD2D<0b0000, "vld2", "8">; -def VLD2d16 : VLD2D<0b0100, "vld2", "16">; -def VLD2d32 : VLD2D<0b1000, "vld2", "32">; +def VLD2d8 : VLD2D<0b1000, 0b0000, "8">; +def VLD2d16 : VLD2D<0b1000, 0b0100, "16">; +def VLD2d32 : VLD2D<0b1000, 0b1000, "32">; def VLD2d64 : NLdSt<0,0b10,0b1010,0b1100, (outs DPR:$dst1, DPR:$dst2), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2\\}, $addr", "", []>; -def VLD2q8 : VLD2Q<0b0000, "vld2", "8">; -def VLD2q16 : VLD2Q<0b0100, "vld2", "16">; -def VLD2q32 : VLD2Q<0b1000, "vld2", "32">; +def VLD2q8 : VLD2Q<0b0000, "8">; +def VLD2q16 : VLD2Q<0b0100, "16">; +def VLD2q32 : VLD2Q<0b1000, "32">; -// These (double-spaced dreg pair) are for disassembly only. -class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr), IIC_VLD2, - OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>; +// ...with address register writeback: +class VLD2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VLD2QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, 0b0011, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD2, + "vld2", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; -def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">; -def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">; -def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">; +def VLD2d8_UPD : VLD2DWB<0b1000, 0b0000, "8">; +def VLD2d16_UPD : VLD2DWB<0b1000, 0b0100, "16">; +def VLD2d32_UPD : VLD2DWB<0b1000, 0b1000, "32">; +def VLD2d64_UPD : NLdSt<0,0b10,0b1010,0b1100, + (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VLD2q8_UPD : VLD2QWB<0b0000, "8">; +def VLD2q16_UPD : VLD2QWB<0b0100, "16">; +def VLD2q32_UPD : VLD2QWB<0b1000, "32">; + +// ...with double-spaced registers (for disassembly only): +def VLD2b8 : VLD2D<0b1001, 0b0000, "8">; +def VLD2b16 : VLD2D<0b1001, 0b0100, "16">; +def VLD2b32 : VLD2D<0b1001, 0b1000, "32">; +def VLD2b8_UPD : VLD2DWB<0b1001, 0b0000, "8">; +def VLD2b16_UPD : VLD2DWB<0b1001, 0b0100, "16">; +def VLD2b32_UPD : VLD2DWB<0b1001, 0b1000, "32">; // VLD3 : Vector Load (multiple 3-element structures) -class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), +class VLD3D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -class VLD3WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0101,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD3, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", - "$addr.addr = $wb", []>; + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -def VLD3d8 : VLD3D<0b0000, "vld3", "8">; -def VLD3d16 : VLD3D<0b0100, "vld3", "16">; -def VLD3d32 : VLD3D<0b1000, "vld3", "32">; +def VLD3d8 : VLD3D<0b0100, 0b0000, "8">; +def VLD3d16 : VLD3D<0b0100, 0b0100, "16">; +def VLD3d32 : VLD3D<0b0100, 0b1000, "32">; def VLD3d64 : NLdSt<0,0b10,0b0110,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr", "", []>; -// vld3 to double-spaced even registers. -def VLD3q8a : VLD3WB<0b0000, "vld3", "8">; -def VLD3q16a : VLD3WB<0b0100, "vld3", "16">; -def VLD3q32a : VLD3WB<0b1000, "vld3", "32">; +// ...with address register writeback: +class VLD3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD3, + "vld3", Dt, "\\{$dst1, $dst2, $dst3\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vld3 to double-spaced odd registers. -def VLD3q8b : VLD3WB<0b0000, "vld3", "8">; -def VLD3q16b : VLD3WB<0b0100, "vld3", "16">; -def VLD3q32b : VLD3WB<0b1000, "vld3", "32">; +def VLD3d8_UPD : VLD3DWB<0b0100, 0b0000, "8">; +def VLD3d16_UPD : VLD3DWB<0b0100, 0b0100, "16">; +def VLD3d32_UPD : VLD3DWB<0b0100, 0b1000, "32">; +def VLD3d64_UPD : NLdSt<0,0b10,0b0110,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", "\\{$dst1, $dst2, $dst3\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VLD3q8 : VLD3D<0b0101, 0b0000, "8">; +def VLD3q16 : VLD3D<0b0101, 0b0100, "16">; +def VLD3q32 : VLD3D<0b0101, 0b1000, "32">; +def VLD3q8_UPD : VLD3DWB<0b0101, 0b0000, "8">; +def VLD3q16_UPD : VLD3DWB<0b0101, 0b0100, "16">; +def VLD3q32_UPD : VLD3DWB<0b0101, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VLD3q8odd_UPD : VLD3DWB<0b0101, 0b0000, "8">; +def VLD3q16odd_UPD : VLD3DWB<0b0101, 0b0100, "16">; +def VLD3q32odd_UPD : VLD3DWB<0b0101, 0b1000, "32">; // VLD4 : Vector Load (multiple 4-element structures) -class VLD4D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0000,op7_4, +class VLD4D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "", []>; -class VLD4WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b10,0b0001,op7_4, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), - (ins addrmode6:$addr), IIC_VLD4, - OpcodeStr, Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", - "$addr.addr = $wb", []>; + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -def VLD4d8 : VLD4D<0b0000, "vld4", "8">; -def VLD4d16 : VLD4D<0b0100, "vld4", "16">; -def VLD4d32 : VLD4D<0b1000, "vld4", "32">; +def VLD4d8 : VLD4D<0b0000, 0b0000, "8">; +def VLD4d16 : VLD4D<0b0000, 0b0100, "16">; +def VLD4d32 : VLD4D<0b0000, 0b1000, "32">; def VLD4d64 : NLdSt<0,0b10,0b0010,0b1100, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$addr), IIC_VLD1, "vld1", "64", "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "", []>; -// vld4 to double-spaced even registers. -def VLD4q8a : VLD4WB<0b0000, "vld4", "8">; -def VLD4q16a : VLD4WB<0b0100, "vld4", "16">; -def VLD4q32a : VLD4WB<0b1000, "vld4", "32">; +// ...with address register writeback: +class VLD4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b10, op11_8, op7_4, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD4, + "vld4", Dt, "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vld4 to double-spaced odd registers. -def VLD4q8b : VLD4WB<0b0000, "vld4", "8">; -def VLD4q16b : VLD4WB<0b0100, "vld4", "16">; -def VLD4q32b : VLD4WB<0b1000, "vld4", "32">; +def VLD4d8_UPD : VLD4DWB<0b0000, 0b0000, "8">; +def VLD4d16_UPD : VLD4DWB<0b0000, 0b0100, "16">; +def VLD4d32_UPD : VLD4DWB<0b0000, 0b1000, "32">; +def VLD4d64_UPD : NLdSt<0,0b10,0b0010,0b1100, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, + GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset), IIC_VLD1, + "vld1", "64", + "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VLD4q8 : VLD4D<0b0001, 0b0000, "8">; +def VLD4q16 : VLD4D<0b0001, 0b0100, "16">; +def VLD4q32 : VLD4D<0b0001, 0b1000, "32">; +def VLD4q8_UPD : VLD4DWB<0b0001, 0b0000, "8">; +def VLD4q16_UPD : VLD4DWB<0b0001, 0b0100, "16">; +def VLD4q32_UPD : VLD4DWB<0b0001, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VLD4q8odd_UPD : VLD4DWB<0b0001, 0b0000, "8">; +def VLD4q16odd_UPD : VLD4DWB<0b0001, 0b0100, "16">; +def VLD4q32odd_UPD : VLD4DWB<0b0001, 0b1000, "32">; // VLD1LN : Vector Load (single element to one lane) // FIXME: Not yet implemented. // VLD2LN : Vector Load (single 2-element structure to one lane) -class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2", []>; +class VLD2LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), + IIC_VLD2, "vld2", Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2", []>; + +def VLD2LNd8 : VLD2LN<0b0001, "8">; +def VLD2LNd16 : VLD2LN<0b0101, "16"> { let Inst{5} = 0; } +def VLD2LNd32 : VLD2LN<0b1001, "32"> { let Inst{6} = 0; } -// vld2 to single-spaced registers. -def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">; -def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; } -def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; } +// ...with double-spaced registers: +def VLD2LNq16 : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32 : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } -// vld2 to double-spaced even registers. -def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VLD2LNq16odd : VLD2LN<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32odd : VLD2LN<0b1001, "32"> { let Inst{6} = 1; } -// vld2 to double-spaced odd registers. -def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; } -def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; } +// ...with address register writeback: +class VLD2LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VLD2, "vld2", Dt, + "\\{$dst1[$lane], $dst2[$lane]\\}, $addr$offset", + "$src1 = $dst1, $src2 = $dst2, $addr.addr = $wb", []>; + +def VLD2LNd8_UPD : VLD2LNWB<0b0001, "8">; +def VLD2LNd16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 0; } +def VLD2LNd32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 0; } + +def VLD2LNq16_UPD : VLD2LNWB<0b0101, "16"> { let Inst{5} = 1; } +def VLD2LNq32_UPD : VLD2LNWB<0b1001, "32"> { let Inst{6} = 1; } // VLD3LN : Vector Load (single 3-element structure to one lane) -class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt, - "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; - -// vld3 to single-spaced registers. -def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; } -def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; } -def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; } - -// vld3 to double-spaced even registers. -def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } - -// vld3 to double-spaced odd registers. -def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; } -def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; } +class VLD3LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, + nohash_imm:$lane), IIC_VLD3, "vld3", Dt, + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>; + +def VLD3LNd8 : VLD3LN<0b0010, "8"> { let Inst{4} = 0; } +def VLD3LNd16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } + +// ...with double-spaced registers: +def VLD3LNq16 : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32 : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...alternate versions to be allocated odd register numbers: +def VLD3LNq16odd : VLD3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32odd : VLD3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...with address register writeback: +class VLD3LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VLD3, "vld3", Dt, + "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr$offset", + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $addr.addr = $wb", + []>; + +def VLD3LNd8_UPD : VLD3LNWB<0b0010, "8"> { let Inst{4} = 0; } +def VLD3LNd16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VLD3LNd32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } + +def VLD3LNq16_UPD : VLD3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VLD3LNq32_UPD : VLD3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } // VLD4LN : Vector Load (single 4-element structure to one lane) -class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b10,op11_8,{?,?,?,?}, - (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt, +class VLD4LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, + nohash_imm:$lane), IIC_VLD4, "vld4", Dt, "\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr", - "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; + "$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>; -// vld4 to single-spaced registers. -def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">; -def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; } -def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; } +def VLD4LNd8 : VLD4LN<0b0011, "8">; +def VLD4LNd16 : VLD4LN<0b0111, "16"> { let Inst{5} = 0; } +def VLD4LNd32 : VLD4LN<0b1011, "32"> { let Inst{6} = 0; } -// vld4 to double-spaced even registers. -def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } +// ...with double-spaced registers: +def VLD4LNq16 : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32 : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } -// vld4 to double-spaced odd registers. -def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; } -def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VLD4LNq16odd : VLD4LN<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32odd : VLD4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VLD4LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b10, op11_8, {?,?,?,?}, + (outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VLD4, "vld4", Dt, +"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr$offset", +"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4, $addr.addr = $wb", + []>; + +def VLD4LNd8_UPD : VLD4LNWB<0b0011, "8">; +def VLD4LNd16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 0; } +def VLD4LNd32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 0; } + +def VLD4LNq16_UPD : VLD4LNWB<0b0111, "16"> { let Inst{5} = 1; } +def VLD4LNq32_UPD : VLD4LNWB<0b1011, "32"> { let Inst{6} = 1; } // VLD1DUP : Vector Load (single element to all lanes) // VLD2DUP : Vector Load (single 2-element structure to all lanes) @@ -355,213 +501,353 @@ def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; } } // mayLoad = 1, hasExtraDefRegAllocReq = 1 // VST1 : Vector Store (multiple single elements) -class VST1D<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> +class VST1D<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$addr, DPR:$src), IIC_VST, - OpcodeStr, Dt, "\\{$src\\}, $addr", "", - [(IntOp addrmode6:$addr, (Ty DPR:$src))]>; -class VST1Q<bits<4> op7_4, string OpcodeStr, string Dt, - ValueType Ty, Intrinsic IntOp> + "vst1", Dt, "\\{$src\\}, $addr", "", + [(int_arm_neon_vst1 addrmode6:$addr, (Ty DPR:$src))]>; +class VST1Q<bits<4> op7_4, string Dt, ValueType Ty> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$addr, QPR:$src), IIC_VST, - OpcodeStr, Dt, "${src:dregpair}, $addr", "", - [(IntOp addrmode6:$addr, (Ty QPR:$src))]>; + "vst1", Dt, "${src:dregpair}, $addr", "", + [(int_arm_neon_vst1 addrmode6:$addr, (Ty QPR:$src))]>; let hasExtraSrcRegAllocReq = 1 in { -def VST1d8 : VST1D<0b0000, "vst1", "8", v8i8, int_arm_neon_vst1>; -def VST1d16 : VST1D<0b0100, "vst1", "16", v4i16, int_arm_neon_vst1>; -def VST1d32 : VST1D<0b1000, "vst1", "32", v2i32, int_arm_neon_vst1>; -def VST1df : VST1D<0b1000, "vst1", "32", v2f32, int_arm_neon_vst1>; -def VST1d64 : VST1D<0b1100, "vst1", "64", v1i64, int_arm_neon_vst1>; - -def VST1q8 : VST1Q<0b0000, "vst1", "8", v16i8, int_arm_neon_vst1>; -def VST1q16 : VST1Q<0b0100, "vst1", "16", v8i16, int_arm_neon_vst1>; -def VST1q32 : VST1Q<0b1000, "vst1", "32", v4i32, int_arm_neon_vst1>; -def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>; -def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>; +def VST1d8 : VST1D<0b0000, "8", v8i8>; +def VST1d16 : VST1D<0b0100, "16", v4i16>; +def VST1d32 : VST1D<0b1000, "32", v2i32>; +def VST1df : VST1D<0b1000, "32", v2f32>; +def VST1d64 : VST1D<0b1100, "64", v1i64>; + +def VST1q8 : VST1Q<0b0000, "8", v16i8>; +def VST1q16 : VST1Q<0b0100, "16", v8i16>; +def VST1q32 : VST1Q<0b1000, "32", v4i32>; +def VST1qf : VST1Q<0b1000, "32", v4f32>; +def VST1q64 : VST1Q<0b1100, "64", v2i64>; } // hasExtraSrcRegAllocReq +let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { + +// ...with address register writeback: +class VST1DWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, DPR:$src), IIC_VST, + "vst1", Dt, "\\{$src\\}, $addr$offset", "$addr.addr = $wb", []>; +class VST1QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, QPR:$src), IIC_VST, + "vst1", Dt, "${src:dregpair}, $addr$offset", "$addr.addr = $wb", []>; + +def VST1d8_UPD : VST1DWB<0b0000, "8">; +def VST1d16_UPD : VST1DWB<0b0100, "16">; +def VST1d32_UPD : VST1DWB<0b1000, "32">; +def VST1d64_UPD : VST1DWB<0b1100, "64">; + +def VST1q8_UPD : VST1QWB<0b0000, "8">; +def VST1q16_UPD : VST1QWB<0b0100, "16">; +def VST1q32_UPD : VST1QWB<0b1000, "32">; +def VST1q64_UPD : VST1QWB<0b1100, "64">; + // These (dreg triple/quadruple) are for disassembly only. -class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt> +class VST1D3<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, - "\\{$src1, $src2, $src3\\}, $addr", "", + (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt> +class VST1D4<bits<4> op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, - "\\{$src1, $src2, $src3, $src4\\}, $addr", "", + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", [/* For disassembly only; pattern left blank */]>; -def VST1d8T : VST1D3<0b0000, "vst1", "8">; -def VST1d16T : VST1D3<0b0100, "vst1", "16">; -def VST1d32T : VST1D3<0b1000, "vst1", "32">; -//def VST1d64T : VST1D3<0b1100, "vst1", "64">; - -def VST1d8Q : VST1D4<0b0000, "vst1", "8">; -def VST1d16Q : VST1D4<0b0100, "vst1", "16">; -def VST1d32Q : VST1D4<0b1000, "vst1", "32">; -//def VST1d64Q : VST1D4<0b1100, "vst1", "64">; +def VST1d8T : VST1D3<0b0000, "8">; +def VST1d16T : VST1D3<0b0100, "16">; +def VST1d32T : VST1D3<0b1000, "32">; +// VST1d64T : implemented as VST3d64 + +def VST1d8Q : VST1D4<0b0000, "8">; +def VST1d16Q : VST1D4<0b0100, "16">; +def VST1d32Q : VST1D4<0b1000, "32">; +// VST1d64Q : implemented as VST4d64 + +// ...with address register writeback: +class VST1D3WB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0110, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +class VST1D4WB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0010, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst1", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", + [/* For disassembly only; pattern left blank */]>; +def VST1d8T_UPD : VST1D3WB<0b0000, "8">; +def VST1d16T_UPD : VST1D3WB<0b0100, "16">; +def VST1d32T_UPD : VST1D3WB<0b1000, "32">; +// VST1d64T_UPD : implemented as VST3d64_UPD -let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { +def VST1d8Q_UPD : VST1D4WB<0b0000, "8">; +def VST1d16Q_UPD : VST1D4WB<0b0100, "16">; +def VST1d32Q_UPD : VST1D4WB<0b1000, "32">; +// VST1d64Q_UPD : implemented as VST4d64_UPD // VST2 : Vector Store (multiple 2-element structures) -class VST2D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b1000,op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; -class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0011,op7_4, (outs), +class VST2D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), + (ins addrmode6:$addr, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr", "", []>; +class VST2Q<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0011, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -def VST2d8 : VST2D<0b0000, "vst2", "8">; -def VST2d16 : VST2D<0b0100, "vst2", "16">; -def VST2d32 : VST2D<0b1000, "vst2", "32">; +def VST2d8 : VST2D<0b1000, 0b0000, "8">; +def VST2d16 : VST2D<0b1000, 0b0100, "16">; +def VST2d32 : VST2D<0b1000, 0b1000, "32">; def VST2d64 : NLdSt<0,0b00,0b1010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, "vst1", "64", "\\{$src1, $src2\\}, $addr", "", []>; -def VST2q8 : VST2Q<0b0000, "vst2", "8">; -def VST2q16 : VST2Q<0b0100, "vst2", "16">; -def VST2q32 : VST2Q<0b1000, "vst2", "32">; +def VST2q8 : VST2Q<0b0000, "8">; +def VST2q16 : VST2Q<0b0100, "16">; +def VST2q32 : VST2Q<0b1000, "32">; -// These (double-spaced dreg pair) are for disassembly only. -class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0, 0b00, 0b1001, op7_4, (outs), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>; +// ...with address register writeback: +class VST2DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, DPR:$src1, DPR:$src2), + IIC_VST, "vst2", Dt, "\\{$src1, $src2\\}, $addr$offset", + "$addr.addr = $wb", []>; +class VST2QWB<bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), + IIC_VST, "vst2", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; -def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">; -def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">; -def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">; +def VST2d8_UPD : VST2DWB<0b1000, 0b0000, "8">; +def VST2d16_UPD : VST2DWB<0b1000, 0b0100, "16">; +def VST2d32_UPD : VST2DWB<0b1000, 0b1000, "32">; +def VST2d64_UPD : NLdSt<0,0b00,0b1010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2), IIC_VST, + "vst1", "64", "\\{$src1, $src2\\}, $addr$offset", + "$addr.addr = $wb", []>; + +def VST2q8_UPD : VST2QWB<0b0000, "8">; +def VST2q16_UPD : VST2QWB<0b0100, "16">; +def VST2q32_UPD : VST2QWB<0b1000, "32">; + +// ...with double-spaced registers (for disassembly only): +def VST2b8 : VST2D<0b1001, 0b0000, "8">; +def VST2b16 : VST2D<0b1001, 0b0100, "16">; +def VST2b32 : VST2D<0b1001, 0b1000, "32">; +def VST2b8_UPD : VST2DWB<0b1001, 0b0000, "8">; +def VST2b16_UPD : VST2DWB<0b1001, 0b0100, "16">; +def VST2b32_UPD : VST2DWB<0b1001, 0b1000, "32">; // VST3 : Vector Store (multiple 3-element structures) -class VST3D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0100,op7_4, (outs), +class VST3D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -class VST3WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0101,op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, - OpcodeStr, Dt, "\\{$src1, $src2, $src3\\}, $addr", - "$addr.addr = $wb", []>; + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr", "", []>; -def VST3d8 : VST3D<0b0000, "vst3", "8">; -def VST3d16 : VST3D<0b0100, "vst3", "16">; -def VST3d32 : VST3D<0b1000, "vst3", "32">; +def VST3d8 : VST3D<0b0100, 0b0000, "8">; +def VST3d16 : VST3D<0b0100, 0b0100, "16">; +def VST3d32 : VST3D<0b0100, 0b1000, "32">; def VST3d64 : NLdSt<0,0b00,0b0110,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr", "", []>; -// vst3 to double-spaced even registers. -def VST3q8a : VST3WB<0b0000, "vst3", "8">; -def VST3q16a : VST3WB<0b0100, "vst3", "16">; -def VST3q32a : VST3WB<0b1000, "vst3", "32">; +// ...with address register writeback: +class VST3DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + "vst3", Dt, "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst3 to double-spaced odd registers. -def VST3q8b : VST3WB<0b0000, "vst3", "8">; -def VST3q16b : VST3WB<0b0100, "vst3", "16">; -def VST3q32b : VST3WB<0b1000, "vst3", "32">; +def VST3d8_UPD : VST3DWB<0b0100, 0b0000, "8">; +def VST3d16_UPD : VST3DWB<0b0100, 0b0100, "16">; +def VST3d32_UPD : VST3DWB<0b0100, 0b1000, "32">; +def VST3d64_UPD : NLdSt<0,0b00,0b0110,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST, + "vst1", "64", "\\{$src1, $src2, $src3\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VST3q8 : VST3D<0b0101, 0b0000, "8">; +def VST3q16 : VST3D<0b0101, 0b0100, "16">; +def VST3q32 : VST3D<0b0101, 0b1000, "32">; +def VST3q8_UPD : VST3DWB<0b0101, 0b0000, "8">; +def VST3q16_UPD : VST3DWB<0b0101, 0b0100, "16">; +def VST3q32_UPD : VST3DWB<0b0101, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VST3q8odd_UPD : VST3DWB<0b0101, 0b0000, "8">; +def VST3q16odd_UPD : VST3DWB<0b0101, 0b0100, "16">; +def VST3q32odd_UPD : VST3DWB<0b0101, 0b1000, "32">; // VST4 : Vector Store (multiple 4-element structures) -class VST4D<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0000,op7_4, (outs), +class VST4D<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", + IIC_VST, "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt> - : NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb), - (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), - IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr", - "$addr.addr = $wb", []>; -def VST4d8 : VST4D<0b0000, "vst4", "8">; -def VST4d16 : VST4D<0b0100, "vst4", "16">; -def VST4d32 : VST4D<0b1000, "vst4", "32">; +def VST4d8 : VST4D<0b0000, 0b0000, "8">; +def VST4d16 : VST4D<0b0000, 0b0100, "16">; +def VST4d32 : VST4D<0b0000, 0b1000, "32">; def VST4d64 : NLdSt<0,0b00,0b0010,0b1100, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, "vst1", "64", "\\{$src1, $src2, $src3, $src4\\}, $addr", "", []>; -// vst4 to double-spaced even registers. -def VST4q8a : VST4WB<0b0000, "vst4", "8">; -def VST4q16a : VST4WB<0b0100, "vst4", "16">; -def VST4q32a : VST4WB<0b1000, "vst4", "32">; +// ...with address register writeback: +class VST4DWB<bits<4> op11_8, bits<4> op7_4, string Dt> + : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + "vst4", Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst4 to double-spaced odd registers. -def VST4q8b : VST4WB<0b0000, "vst4", "8">; -def VST4q16b : VST4WB<0b0100, "vst4", "16">; -def VST4q32b : VST4WB<0b1000, "vst4", "32">; +def VST4d8_UPD : VST4DWB<0b0000, 0b0000, "8">; +def VST4d16_UPD : VST4DWB<0b0000, 0b0100, "16">; +def VST4d32_UPD : VST4DWB<0b0000, 0b1000, "32">; +def VST4d64_UPD : NLdSt<0,0b00,0b0010,0b1100, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4), IIC_VST, + "vst1", "64", + "\\{$src1, $src2, $src3, $src4\\}, $addr$offset", + "$addr.addr = $wb", []>; + +// ...with double-spaced registers (non-updating versions for disassembly only): +def VST4q8 : VST4D<0b0001, 0b0000, "8">; +def VST4q16 : VST4D<0b0001, 0b0100, "16">; +def VST4q32 : VST4D<0b0001, 0b1000, "32">; +def VST4q8_UPD : VST4DWB<0b0001, 0b0000, "8">; +def VST4q16_UPD : VST4DWB<0b0001, 0b0100, "16">; +def VST4q32_UPD : VST4DWB<0b0001, 0b1000, "32">; + +// ...alternate versions to be allocated odd register numbers: +def VST4q8odd_UPD : VST4DWB<0b0001, 0b0000, "8">; +def VST4q16odd_UPD : VST4DWB<0b0001, 0b0100, "16">; +def VST4q32odd_UPD : VST4DWB<0b0001, 0b1000, "32">; // VST1LN : Vector Store (single element from one lane) // FIXME: Not yet implemented. // VST2LN : Vector Store (single 2-element structure from one lane) -class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST2LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane), - IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", + IIC_VST, "vst2", Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr", "", []>; -// vst2 to single-spaced registers. -def VST2LNd8 : VST2LN<0b0001, "vst2", "8">; -def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; } -def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; } +def VST2LNd8 : VST2LN<0b0001, "8">; +def VST2LNd16 : VST2LN<0b0101, "16"> { let Inst{5} = 0; } +def VST2LNd32 : VST2LN<0b1001, "32"> { let Inst{6} = 0; } + +// ...with double-spaced registers: +def VST2LNq16 : VST2LN<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32 : VST2LN<0b1001, "32"> { let Inst{6} = 1; } -// vst2 to double-spaced even registers. -def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } +// ...alternate versions to be allocated odd register numbers: +def VST2LNq16odd : VST2LN<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32odd : VST2LN<0b1001, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VST2LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, nohash_imm:$lane), IIC_VST, "vst2", Dt, + "\\{$src1[$lane], $src2[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst2 to double-spaced odd registers. -def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; } -def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; } +def VST2LNd8_UPD : VST2LNWB<0b0001, "8">; +def VST2LNd16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 0; } +def VST2LNd32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 0; } + +def VST2LNq16_UPD : VST2LNWB<0b0101, "16"> { let Inst{5} = 1; } +def VST2LNq32_UPD : VST2LNWB<0b1001, "32"> { let Inst{6} = 1; } // VST3LN : Vector Store (single 3-element structure from one lane) -class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST3LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, - nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + nohash_imm:$lane), IIC_VST, "vst3", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>; -// vst3 to single-spaced registers. -def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; } -def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; } -def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; } +def VST3LNd8 : VST3LN<0b0010, "8"> { let Inst{4} = 0; } +def VST3LNd16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b000; } + +// ...with double-spaced registers: +def VST3LNq16 : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32 : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...alternate versions to be allocated odd register numbers: +def VST3LNq16odd : VST3LN<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32odd : VST3LN<0b1010, "32"> { let Inst{6-4} = 0b100; } + +// ...with address register writeback: +class VST3LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, nohash_imm:$lane), + IIC_VST, "vst3", Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst3 to double-spaced even registers. -def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } +def VST3LNd8_UPD : VST3LNWB<0b0010, "8"> { let Inst{4} = 0; } +def VST3LNd16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b00; } +def VST3LNd32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b000; } -// vst3 to double-spaced odd registers. -def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; } -def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; } +def VST3LNq16_UPD : VST3LNWB<0b0110, "16"> { let Inst{5-4} = 0b10; } +def VST3LNq32_UPD : VST3LNWB<0b1010, "32"> { let Inst{6-4} = 0b100; } // VST4LN : Vector Store (single 4-element structure from one lane) -class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt> - : NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs), +class VST4LN<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs), (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, - nohash_imm:$lane), IIC_VST, OpcodeStr, Dt, + nohash_imm:$lane), IIC_VST, "vst4", Dt, "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr", "", []>; -// vst4 to single-spaced registers. -def VST4LNd8 : VST4LN<0b0011, "vst4", "8">; -def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; } -def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; } +def VST4LNd8 : VST4LN<0b0011, "8">; +def VST4LNd16 : VST4LN<0b0111, "16"> { let Inst{5} = 0; } +def VST4LNd32 : VST4LN<0b1011, "32"> { let Inst{6} = 0; } + +// ...with double-spaced registers: +def VST4LNq16 : VST4LN<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32 : VST4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...alternate versions to be allocated odd register numbers: +def VST4LNq16odd : VST4LN<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32odd : VST4LN<0b1011, "32"> { let Inst{6} = 1; } + +// ...with address register writeback: +class VST4LNWB<bits<4> op11_8, string Dt> + : NLdSt<1, 0b00, op11_8, {?,?,?,?}, (outs GPR:$wb), + (ins addrmode6:$addr, am6offset:$offset, + DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4, nohash_imm:$lane), + IIC_VST, "vst4", Dt, + "\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr$offset", + "$addr.addr = $wb", []>; -// vst4 to double-spaced even registers. -def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } +def VST4LNd8_UPD : VST4LNWB<0b0011, "8">; +def VST4LNd16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 0; } +def VST4LNd32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 0; } -// vst4 to double-spaced odd registers. -def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; } -def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; } +def VST4LNq16_UPD : VST4LNWB<0b0111, "16"> { let Inst{5} = 1; } +def VST4LNq32_UPD : VST4LNWB<0b1011, "32"> { let Inst{6} = 1; } } // mayStore = 1, hasExtraSrcRegAllocReq = 1 diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 37c9fc5..e3ca536 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -16,7 +16,8 @@ // def ARMtcall : SDNode<"ARMISD::tCALL", SDT_ARMcall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def imm_neg_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(-(int)N->getZExtValue(), MVT::i32); @@ -549,7 +550,7 @@ def tLDM : T1I<(outs), def tLDM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}\t$addr, $dsts", + "ldm${addr:submode}${p}\t$addr!, $dsts", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,1,?}>; // A6.2 & A8.6.53 } // mayLoad, hasExtraDefRegAllocReq @@ -558,7 +559,7 @@ let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def tSTM_UPD : T1It<(outs tGPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}\t$addr, $srcs", + "stm${addr:submode}${p}\t$addr!, $srcs", "$addr.addr = $wb", []>, T1Encoding<{1,1,0,0,0,?}>; // A6.2 & A8.6.189 diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index ab9e926..262aae4 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1218,7 +1218,7 @@ def t2LDM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, def t2LDM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$dsts, variable_ops), IIC_iLoadm, - "ldm${addr:submode}${p}${addr:wide}\t$addr, $dsts", + "ldm${addr:submode}${p}${addr:wide}\t$addr!, $dsts", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; @@ -1244,7 +1244,7 @@ def t2STM : T2XI<(outs), (ins addrmode4:$addr, pred:$p, def t2STM_UPD : T2XIt<(outs GPR:$wb), (ins addrmode4:$addr, pred:$p, reglist:$srcs, variable_ops), IIC_iStorem, - "stm${addr:submode}${p}${addr:wide}\t$addr, $srcs", + "stm${addr:submode}${p}${addr:wide}\t$addr!, $srcs", "$addr.addr = $wb", []> { let Inst{31-27} = 0b11101; let Inst{26-25} = 0b00; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 4d1d48a..aca8230 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -25,8 +25,6 @@ def arm_ftoui : SDNode<"ARMISD::FTOUI", SDT_FTOI>; def arm_ftosi : SDNode<"ARMISD::FTOSI", SDT_FTOI>; def arm_sitof : SDNode<"ARMISD::SITOF", SDT_ITOF>; def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; -def arm_f16tof32 : SDNode<"ARMISD::F16_TO_F32", SDT_ITOF>; -def arm_f32tof16 : SDNode<"ARMISD::F32_TO_F16", SDT_FTOI>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; @@ -94,7 +92,7 @@ def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$dsts, def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "vldm${addr:submode}${p}\t${addr:base}!, $dsts", "$addr.base = $wb", []> { let Inst{20} = 1; } @@ -102,7 +100,7 @@ def VLDMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, def VLDMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$dsts, variable_ops), IndexModeUpd, IIC_fpLoadm, - "vldm${addr:submode}${p}\t${addr:base}, $dsts", + "vldm${addr:submode}${p}\t${addr:base}!, $dsts", "$addr.base = $wb", []> { let Inst{20} = 1; } @@ -124,7 +122,7 @@ def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$srcs, def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "vstm${addr:submode}${p}\t${addr:base}!, $srcs", "$addr.base = $wb", []> { let Inst{20} = 0; } @@ -132,7 +130,7 @@ def VSTMD_UPD : AXDI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, def VSTMS_UPD : AXSI5<(outs GPR:$wb), (ins addrmode5:$addr, pred:$p, reglist:$srcs, variable_ops), IndexModeUpd, IIC_fpStorem, - "vstm${addr:submode}${p}\t${addr:base}, $srcs", + "vstm${addr:submode}${p}\t${addr:base}!, $srcs", "$addr.base = $wb", []> { let Inst{20} = 0; } @@ -259,11 +257,17 @@ def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, def VCVTBSH : ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f32.f16\t$dst, $a", - [(set SPR:$dst, (f32 (arm_f32tof16 SPR:$a)))]>; + [/* For disassembly only; pattern left blank */]>; + +def : ARMPat<(f32_to_f16 SPR:$a), + (i32 (COPY_TO_REGCLASS (VCVTBSH SPR:$a), GPR))>; def VCVTBHS : ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtb", ".f16.f32\t$dst, $a", - [(set SPR:$dst, (arm_f16tof32 SPR:$a))]>; + [/* For disassembly only; pattern left blank */]>; + +def : ARMPat<(f16_to_f32 GPR:$a), + (VCVTBHS (COPY_TO_REGCLASS GPR:$a, SPR))>; def VCVTTSH : ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$dst), (ins SPR:$a), /* FIXME */ IIC_fpCVTDS, "vcvtt", ".f32.f16\t$dst, $a", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 8fbcf45..bdbec30 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -253,7 +253,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, .addImm(ARM_AM::getAM4ModeImm(Mode)).addImm(Pred).addReg(PredReg) : BuildMI(MBB, MBBI, dl, TII->get(Opcode)) .addReg(Base, getKillRegState(BaseKill)) - .addImm(ARM_AM::getAM5Opc(Mode, false, isDPR ? NumRegs<<1 : NumRegs)) + .addImm(ARM_AM::getAM5Opc(Mode, isDPR ? NumRegs<<1 : NumRegs)) .addImm(Pred).addReg(PredReg); for (unsigned i = 0; i != NumRegs; ++i) MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef) @@ -505,11 +505,9 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, if (MI->getOperand(i).getReg() == Base) return false; } - assert(!ARM_AM::getAM4WBFlag(MI->getOperand(1).getImm())); Mode = ARM_AM::getAM4SubMode(MI->getOperand(1).getImm()); } else { // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. - assert(!ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())); Mode = ARM_AM::getAM5SubMode(MI->getOperand(1).getImm()); Offset = ARM_AM::getAM5Offset(MI->getOperand(1).getImm()); } @@ -573,11 +571,11 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, .addReg(Base, getKillRegState(BaseKill)); if (isAM4) { // [t2]LDM_UPD, [t2]STM_UPD - MIB.addImm(ARM_AM::getAM4ModeImm(Mode, true)) + MIB.addImm(ARM_AM::getAM4ModeImm(Mode)) .addImm(Pred).addReg(PredReg); } else { // VLDM[SD}_UPD, VSTM[SD]_UPD - MIB.addImm(ARM_AM::getAM5Opc(Mode, true, Offset)) + MIB.addImm(ARM_AM::getAM5Opc(Mode, Offset)) .addImm(Pred).addReg(PredReg); } // Transfer the rest of operands. @@ -709,7 +707,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Offset = 0; if (isAM5) Offset = ARM_AM::getAM5Opc(AddSub == ARM_AM::sub ? ARM_AM::db : ARM_AM::ia, - true, (isDPR ? 2 : 1)); + (isDPR ? 2 : 1)); else if (isAM2) Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift); else @@ -1157,19 +1155,24 @@ namespace { }; } -/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op -/// (bx lr) into the preceeding stack restore so it directly restore the value -/// of LR into pc. -/// ldmfd sp!, {r7, lr} +/// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops +/// ("bx lr" and "mov pc, lr") into the preceeding stack restore so it +/// directly restore the value of LR into pc. +/// ldmfd sp!, {..., lr} /// bx lr +/// or +/// ldmfd sp!, {..., lr} +/// mov pc, lr /// => -/// ldmfd sp!, {r7, pc} +/// ldmfd sp!, {..., pc} bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) { if (MBB.empty()) return false; MachineBasicBlock::iterator MBBI = prior(MBB.end()); if (MBBI != MBB.begin() && - (MBBI->getOpcode() == ARM::BX_RET || MBBI->getOpcode() == ARM::tBX_RET)) { + (MBBI->getOpcode() == ARM::BX_RET || + MBBI->getOpcode() == ARM::tBX_RET || + MBBI->getOpcode() == ARM::MOVPCLR)) { MachineInstr *PrevMI = prior(MBBI); if (PrevMI->getOpcode() == ARM::LDM_UPD || PrevMI->getOpcode() == ARM::t2LDM_UPD) { diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 7233f5c..95f57b7 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -21,7 +21,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 88e67e3..c32f16c 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -23,6 +23,7 @@ #include "ARMISelLowering.h" #include "Thumb1InstrInfo.h" #include "Thumb2InstrInfo.h" +#include "llvm/ADT/OwningPtr.h" namespace llvm { @@ -83,7 +84,8 @@ public: /// Thumb-1 and Thumb-2. /// class ThumbTargetMachine : public ARMBaseTargetMachine { - ARMBaseInstrInfo *InstrInfo; // either Thumb1InstrInfo or Thumb2InstrInfo + // Either Thumb1InstrInfo or Thumb2InstrInfo. + OwningPtr<ARMBaseInstrInfo> InstrInfo; const TargetData DataLayout; // Calculates type size & alignment ARMTargetLowering TLInfo; public: @@ -100,7 +102,9 @@ public: } /// returns either Thumb1InstrInfo or Thumb2InstrInfo - virtual const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo; } + virtual const ARMBaseInstrInfo *getInstrInfo() const { + return InstrInfo.get(); + } virtual const TargetData *getTargetData() const { return &DataLayout; } }; diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 4db14a3..4a7a1e4 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -101,6 +101,7 @@ namespace { void printAddrMode5Operand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MachineInstr *MI, int OpNum); + void printAddrMode6OffsetOperand(const MachineInstr *MI, int OpNum); void printAddrModePCOperand(const MachineInstr *MI, int OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNum); @@ -431,16 +432,16 @@ void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) { O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. O << ", #" - << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) << ARM_AM::getAM2Offset(MO3.getImm()); O << "]"; return; } O << ", " - << (char)ARM_AM::getAM2Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) << getRegisterName(MO2.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) @@ -458,12 +459,12 @@ void ARMAsmPrinter::printAddrMode2OffsetOperand(const MachineInstr *MI, int Op){ unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); O << "#" - << (char)ARM_AM::getAM2Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << ImmOffs; return; } - O << (char)ARM_AM::getAM2Op(MO2.getImm()) + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) << getRegisterName(MO1.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) @@ -490,7 +491,7 @@ void ARMAsmPrinter::printAddrMode3Operand(const MachineInstr *MI, int Op) { if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" - << (char)ARM_AM::getAM3Op(MO3.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) << ImmOffs; O << "]"; } @@ -508,35 +509,22 @@ void ARMAsmPrinter::printAddrMode3OffsetOperand(const MachineInstr *MI, int Op){ unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); O << "#" - << (char)ARM_AM::getAM3Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) << ImmOffs; } void ARMAsmPrinter::printAddrMode4Operand(const MachineInstr *MI, int Op, const char *Modifier) { - const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { - if (MO1.getReg() == ARM::SP) { - // FIXME - bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_UPD || - MI->getOpcode() == ARM::LDM_RET || - MI->getOpcode() == ARM::t2LDM || - MI->getOpcode() == ARM::t2LDM_UPD || - MI->getOpcode() == ARM::t2LDM_RET); - O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); } else if (Modifier && strcmp(Modifier, "wide") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Mode == ARM_AM::ia) O << ".w"; } else { printOperand(MI, Op); - if (ARM_AM::getAM4WBFlag(MO2.getImm())) - O << "!"; } } @@ -559,8 +547,6 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. O << getRegisterName(MO1.getReg()); - if (ARM_AM::getAM5WBFlag(MO2.getImm())) - O << "!"; return; } @@ -568,7 +554,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" - << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs*4; } O << "]"; @@ -577,22 +563,21 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); - const MachineOperand &MO3 = MI->getOperand(Op+2); - const MachineOperand &MO4 = MI->getOperand(Op+3); O << "[" << getRegisterName(MO1.getReg()); - if (MO4.getImm()) { + if (MO2.getImm()) { // FIXME: Both darwin as and GNU as violate ARM docs here. - O << ", :" << MO4.getImm(); + O << ", :" << MO2.getImm(); } O << "]"; +} - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << "!"; - else - O << ", " << getRegisterName(MO2.getReg()); - } +void ARMAsmPrinter::printAddrMode6OffsetOperand(const MachineInstr *MI, int Op){ + const MachineOperand &MO = MI->getOperand(Op); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, @@ -604,7 +589,7 @@ void ARMAsmPrinter::printAddrModePCOperand(const MachineInstr *MI, int Op, const MachineOperand &MO1 = MI->getOperand(Op); assert(TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); - O << "[pc, +" << getRegisterName(MO1.getReg()) << "]"; + O << "[pc, " << getRegisterName(MO1.getReg()) << "]"; } void @@ -627,10 +612,11 @@ void ARMAsmPrinter::printThumbITMask(const MachineInstr *MI, int Op) { // (3 - the number of trailing zeros) is the number of then / else. unsigned Mask = MI->getOperand(Op).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; unsigned NumTZ = CountTrailingZeros_32(Mask); assert(NumTZ <= 3 && "Invalid IT mask!"); for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { - bool T = (Mask & (1 << Pos)) == 0; + bool T = ((Mask >> Pos) & 1) == CondBit0; if (T) O << 't'; else @@ -662,7 +648,7 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); else if (unsigned ImmOffs = MO2.getImm()) - O << ", #+" << ImmOffs * Scale; + O << ", #" << ImmOffs * Scale; O << "]"; } @@ -684,7 +670,7 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) { const MachineOperand &MO2 = MI->getOperand(Op+1); O << "[" << getRegisterName(MO1.getReg()); if (unsigned ImmOffs = MO2.getImm()) - O << ", #+" << ImmOffs*4; + O << ", #" << ImmOffs*4; O << "]"; } @@ -720,7 +706,7 @@ void ARMAsmPrinter::printT2AddrModeImm12Operand(const MachineInstr *MI, unsigned OffImm = MO2.getImm(); if (OffImm) // Don't print +0. - O << ", #+" << OffImm; + O << ", #" << OffImm; O << "]"; } @@ -736,7 +722,7 @@ void ARMAsmPrinter::printT2AddrModeImm8Operand(const MachineInstr *MI, if (OffImm < 0) O << ", #-" << -OffImm; else if (OffImm > 0) - O << ", #+" << OffImm; + O << ", #" << OffImm; O << "]"; } @@ -752,7 +738,7 @@ void ARMAsmPrinter::printT2AddrModeImm8s4Operand(const MachineInstr *MI, if (OffImm < 0) O << ", #-" << -OffImm * 4; else if (OffImm > 0) - O << ", #+" << OffImm * 4; + O << ", #" << OffImm * 4; O << "]"; } @@ -764,7 +750,7 @@ void ARMAsmPrinter::printT2AddrModeImm8OffsetOperand(const MachineInstr *MI, if (OffImm < 0) O << "#-" << -OffImm; else if (OffImm > 0) - O << "#+" << OffImm; + O << "#" << OffImm; } void ARMAsmPrinter::printT2AddrModeSoRegOperand(const MachineInstr *MI, diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index a2084b0..30763a9 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -28,7 +28,159 @@ using namespace llvm; #undef MachineInstr #undef ARMAsmPrinter -void ARMInstPrinter::printInst(const MCInst *MI) { printInstruction(MI); } +static unsigned NextReg(unsigned Reg) { + switch (Reg) { + default: + assert(0 && "Unexpected register enum"); + + case ARM::D0: + return ARM::D1; + case ARM::D1: + return ARM::D2; + case ARM::D2: + return ARM::D3; + case ARM::D3: + return ARM::D4; + case ARM::D4: + return ARM::D5; + case ARM::D5: + return ARM::D6; + case ARM::D6: + return ARM::D7; + case ARM::D7: + return ARM::D8; + case ARM::D8: + return ARM::D9; + case ARM::D9: + return ARM::D10; + case ARM::D10: + return ARM::D11; + case ARM::D11: + return ARM::D12; + case ARM::D12: + return ARM::D13; + case ARM::D13: + return ARM::D14; + case ARM::D14: + return ARM::D15; + case ARM::D15: + return ARM::D16; + case ARM::D16: + return ARM::D17; + case ARM::D17: + return ARM::D18; + case ARM::D18: + return ARM::D19; + case ARM::D19: + return ARM::D20; + case ARM::D20: + return ARM::D21; + case ARM::D21: + return ARM::D22; + case ARM::D22: + return ARM::D23; + case ARM::D23: + return ARM::D24; + case ARM::D24: + return ARM::D25; + case ARM::D25: + return ARM::D26; + case ARM::D26: + return ARM::D27; + case ARM::D27: + return ARM::D28; + case ARM::D28: + return ARM::D29; + case ARM::D29: + return ARM::D30; + case ARM::D30: + return ARM::D31; + } +} + +void ARMInstPrinter::printInst(const MCInst *MI) { + // Check for MOVs and print canonical forms, instead. + if (MI->getOpcode() == ARM::MOVs) { + const MCOperand &Dst = MI->getOperand(0); + const MCOperand &MO1 = MI->getOperand(1); + const MCOperand &MO2 = MI->getOperand(2); + const MCOperand &MO3 = MI->getOperand(3); + + O << '\t' << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO3.getImm())); + printSBitModifierOperand(MI, 6); + printPredicateOperand(MI, 4); + + O << '\t' << getRegisterName(Dst.getReg()) + << ", " << getRegisterName(MO1.getReg()); + + if (ARM_AM::getSORegShOp(MO3.getImm()) == ARM_AM::rrx) + return; + + O << ", "; + + if (MO2.getReg()) { + O << getRegisterName(MO2.getReg()); + assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); + } else { + O << "#" << ARM_AM::getSORegOffset(MO3.getImm()); + } + return; + } + + // A8.6.123 PUSH + if ((MI->getOpcode() == ARM::STM_UPD || MI->getOpcode() == ARM::t2STM_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::db) { + O << '\t' << "push"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.122 POP + if ((MI->getOpcode() == ARM::LDM_UPD || MI->getOpcode() == ARM::t2LDM_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM4SubMode(MO1.getImm()) == ARM_AM::ia) { + O << '\t' << "pop"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.355 VPUSH + if ((MI->getOpcode() == ARM::VSTMS_UPD || MI->getOpcode() ==ARM::VSTMD_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::db) { + O << '\t' << "vpush"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + // A8.6.354 VPOP + if ((MI->getOpcode() == ARM::VLDMS_UPD || MI->getOpcode() ==ARM::VLDMD_UPD) && + MI->getOperand(0).getReg() == ARM::SP) { + const MCOperand &MO1 = MI->getOperand(2); + if (ARM_AM::getAM5SubMode(MO1.getImm()) == ARM_AM::ia) { + O << '\t' << "vpop"; + printPredicateOperand(MI, 3); + O << '\t'; + printRegisterList(MI, 5); + return; + } + } + + printInstruction(MI); + } void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, const char *Modifier) { @@ -36,6 +188,9 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, if (Op.isReg()) { unsigned Reg = Op.getReg(); if (Modifier && strcmp(Modifier, "dregpair") == 0) { + O << '{' << getRegisterName(Reg) << ", " + << getRegisterName(NextReg(Reg)) << '}'; +#if 0 // FIXME: Breaks e.g. ARM/vmul.ll. assert(0); /* @@ -44,6 +199,7 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '{' << getRegisterName(DRegLo) << ',' << getRegisterName(DRegHi) << '}';*/ +#endif } else if (Modifier && strcmp(Modifier, "lane") == 0) { assert(0); /* @@ -56,7 +212,8 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << getRegisterName(Reg); } } else if (Op.isImm()) { - assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); + assert((Modifier && !strcmp(Modifier, "call")) || + ((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported")); O << '#' << Op.getImm(); } else { assert((Modifier == 0 || Modifier[0] == 0) && "No modifiers supported"); @@ -142,17 +299,17 @@ void ARMInstPrinter::printAddrMode2Operand(const MCInst *MI, unsigned Op) { O << "[" << getRegisterName(MO1.getReg()); if (!MO2.getReg()) { - if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. + if (ARM_AM::getAM2Offset(MO3.getImm())) // Don't print +0. O << ", #" - << (char)ARM_AM::getAM2Op(MO3.getImm()) - << ARM_AM::getAM2Offset(MO3.getImm()); + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << ARM_AM::getAM2Offset(MO3.getImm()); O << "]"; return; } O << ", " - << (char)ARM_AM::getAM2Op(MO3.getImm()) - << getRegisterName(MO2.getReg()); + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO3.getImm())) + << getRegisterName(MO2.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO3.getImm())) O << ", " @@ -169,11 +326,14 @@ void ARMInstPrinter::printAddrMode2OffsetOperand(const MCInst *MI, if (!MO1.getReg()) { unsigned ImmOffs = ARM_AM::getAM2Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); - O << '#' << (char)ARM_AM::getAM2Op(MO2.getImm()) << ImmOffs; + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << ImmOffs; return; } - O << (char)ARM_AM::getAM2Op(MO2.getImm()) << getRegisterName(MO1.getReg()); + O << ARM_AM::getAddrOpcStr(ARM_AM::getAM2Op(MO2.getImm())) + << getRegisterName(MO1.getReg()); if (unsigned ShImm = ARM_AM::getAM2Offset(MO2.getImm())) O << ", " @@ -196,8 +356,8 @@ void ARMInstPrinter::printAddrMode3Operand(const MCInst *MI, unsigned OpNum) { if (unsigned ImmOffs = ARM_AM::getAM3Offset(MO3.getImm())) O << ", #" - << (char)ARM_AM::getAM3Op(MO3.getImm()) - << ImmOffs; + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO3.getImm())) + << ImmOffs; O << ']'; } @@ -214,35 +374,24 @@ void ARMInstPrinter::printAddrMode3OffsetOperand(const MCInst *MI, unsigned ImmOffs = ARM_AM::getAM3Offset(MO2.getImm()); assert(ImmOffs && "Malformed indexed load / store!"); - O << "#" - << (char)ARM_AM::getAM3Op(MO2.getImm()) - << ImmOffs; + O << '#' + << ARM_AM::getAddrOpcStr(ARM_AM::getAM3Op(MO2.getImm())) + << ImmOffs; } void ARMInstPrinter::printAddrMode4Operand(const MCInst *MI, unsigned OpNum, const char *Modifier) { - const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Modifier && strcmp(Modifier, "submode") == 0) { - if (MO1.getReg() == ARM::SP) { - // FIXME - bool isLDM = (MI->getOpcode() == ARM::LDM || - MI->getOpcode() == ARM::LDM_RET || - MI->getOpcode() == ARM::t2LDM || - MI->getOpcode() == ARM::t2LDM_RET); - O << ARM_AM::getAMSubModeAltStr(Mode, isLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); } else if (Modifier && strcmp(Modifier, "wide") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM4SubMode(MO2.getImm()); if (Mode == ARM_AM::ia) O << ".w"; } else { printOperand(MI, OpNum); - if (ARM_AM::getAM4WBFlag(MO2.getImm())) - O << "!"; } } @@ -263,8 +412,6 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. O << getRegisterName(MO1.getReg()); - if (ARM_AM::getAM5WBFlag(MO2.getImm())) - O << "!"; return; } @@ -272,7 +419,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, if (unsigned ImmOffs = ARM_AM::getAM5Offset(MO2.getImm())) { O << ", #" - << (char)ARM_AM::getAM5Op(MO2.getImm()) + << ARM_AM::getAddrOpcStr(ARM_AM::getAM5Op(MO2.getImm())) << ImmOffs*4; } O << "]"; @@ -281,17 +428,22 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, void ARMInstPrinter::printAddrMode6Operand(const MCInst *MI, unsigned OpNum) { const MCOperand &MO1 = MI->getOperand(OpNum); const MCOperand &MO2 = MI->getOperand(OpNum+1); - const MCOperand &MO3 = MI->getOperand(OpNum+2); - // FIXME: No support yet for specifying alignment. - O << '[' << getRegisterName(MO1.getReg()) << ']'; - - if (ARM_AM::getAM6WBFlag(MO3.getImm())) { - if (MO2.getReg() == 0) - O << '!'; - else - O << ", " << getRegisterName(MO2.getReg()); + O << "[" << getRegisterName(MO1.getReg()); + if (MO2.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << MO2.getImm(); } + O << "]"; +} + +void ARMInstPrinter::printAddrMode6OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO = MI->getOperand(OpNum); + if (MO.getReg() == 0) + O << "!"; + else + O << ", " << getRegisterName(MO.getReg()); } void ARMInstPrinter::printAddrModePCOperand(const MCInst *MI, unsigned OpNum, @@ -311,14 +463,56 @@ void ARMInstPrinter::printBitfieldInvMaskImmOperand (const MCInst *MI, void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum) { O << "{"; - // Always skip the first operand, it's the optional (and implicit writeback). - for (unsigned i = OpNum+1, e = MI->getNumOperands(); i != e; ++i) { - if (i != OpNum+1) O << ", "; + for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) { + if (i != OpNum) O << ", "; O << getRegisterName(MI->getOperand(i).getReg()); } O << "}"; } +void ARMInstPrinter::printCPSOptionOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned option = Op.getImm(); + unsigned mode = option & 31; + bool changemode = option >> 5 & 1; + unsigned AIF = option >> 6 & 7; + unsigned imod = option >> 9 & 3; + if (imod == 2) + O << "ie"; + else if (imod == 3) + O << "id"; + O << '\t'; + if (imod > 1) { + if (AIF & 4) O << 'a'; + if (AIF & 2) O << 'i'; + if (AIF & 1) O << 'f'; + if (AIF > 0 && changemode) O << ", "; + } + if (changemode) + O << '#' << mode; +} + +void ARMInstPrinter::printMSRMaskOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &Op = MI->getOperand(OpNum); + unsigned Mask = Op.getImm(); + if (Mask) { + O << '_'; + if (Mask & 8) O << 'f'; + if (Mask & 4) O << 's'; + if (Mask & 2) O << 'x'; + if (Mask & 1) O << 'c'; + } +} + +void ARMInstPrinter::printNegZeroOperand(const MCInst *MI, unsigned OpNum){ + const MCOperand &Op = MI->getOperand(OpNum); + O << '#'; + if (Op.getImm() < 0) + O << '-' << (-Op.getImm() - 1); + else + O << Op.getImm(); +} + void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) { ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm(); if (CC != ARMCC::AL) @@ -360,3 +554,191 @@ void ARMInstPrinter::printPCLabel(const MCInst *MI, unsigned OpNum) { void ARMInstPrinter::printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum) { O << "#" << MI->getOperand(OpNum).getImm() * 4; } + +void ARMInstPrinter::printThumbITMask(const MCInst *MI, unsigned OpNum) { + // (3 - the number of trailing zeros) is the number of then / else. + unsigned Mask = MI->getOperand(OpNum).getImm(); + unsigned CondBit0 = Mask >> 4 & 1; + unsigned NumTZ = CountTrailingZeros_32(Mask); + assert(NumTZ <= 3 && "Invalid IT mask!"); + for (unsigned Pos = 3, e = NumTZ; Pos > e; --Pos) { + bool T = ((Mask >> Pos) & 1) == CondBit0; + if (T) + O << 't'; + else + O << 'e'; + } +} + +void ARMInstPrinter::printThumbAddrModeRROperand(const MCInst *MI, unsigned Op) +{ + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()); + O << ", " << getRegisterName(MO2.getReg()) << "]"; +} + +void ARMInstPrinter::printThumbAddrModeRI5Operand(const MCInst *MI, unsigned Op, + unsigned Scale) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + const MCOperand &MO3 = MI->getOperand(Op+2); + + if (!MO1.isReg()) { // FIXME: This is for CP entries, but isn't right. + printOperand(MI, Op); + return; + } + + O << "[" << getRegisterName(MO1.getReg()); + if (MO3.getReg()) + O << ", " << getRegisterName(MO3.getReg()); + else if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs * Scale; + O << "]"; +} + +void ARMInstPrinter::printThumbAddrModeS1Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 1); +} + +void ARMInstPrinter::printThumbAddrModeS2Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 2); +} + +void ARMInstPrinter::printThumbAddrModeS4Operand(const MCInst *MI, unsigned Op) +{ + printThumbAddrModeRI5Operand(MI, Op, 4); +} + +void ARMInstPrinter::printThumbAddrModeSPOperand(const MCInst *MI,unsigned Op) { + const MCOperand &MO1 = MI->getOperand(Op); + const MCOperand &MO2 = MI->getOperand(Op+1); + O << "[" << getRegisterName(MO1.getReg()); + if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs*4; + O << "]"; +} + +void ARMInstPrinter::printTBAddrMode(const MCInst *MI, unsigned OpNum) { + O << "[pc, " << getRegisterName(MI->getOperand(OpNum).getReg()); + if (MI->getOpcode() == ARM::t2TBH) + O << ", lsl #1"; + O << ']'; +} + +// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2 +// register with shift forms. +// REG 0 0 - e.g. R5 +// REG IMM, SH_OPC - e.g. R5, LSL #3 +void ARMInstPrinter::printT2SOOperand(const MCInst *MI, unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + unsigned Reg = MO1.getReg(); + O << getRegisterName(Reg); + + // Print the shift opc. + O << ", " + << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm())) + << " "; + + assert(MO2.isImm() && "Not a valid t2_so_reg value!"); + O << "#" << ARM_AM::getSORegOffset(MO2.getImm()); +} + +void ARMInstPrinter::printT2AddrModeImm12Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + unsigned OffImm = MO2.getImm(); + if (OffImm) // Don't print +0. + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm(); + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm; + else if (OffImm > 0) + O << ", #" << OffImm; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8s4Operand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + + O << "[" << getRegisterName(MO1.getReg()); + + int32_t OffImm = (int32_t)MO2.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << ", #-" << -OffImm * 4; + else if (OffImm > 0) + O << ", #" << OffImm * 4; + O << "]"; +} + +void ARMInstPrinter::printT2AddrModeImm8OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm(); + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm; + else if (OffImm > 0) + O << "#" << OffImm; +} + +void ARMInstPrinter::printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + int32_t OffImm = (int32_t)MO1.getImm() / 4; + // Don't print +0. + if (OffImm < 0) + O << "#-" << -OffImm * 4; + else if (OffImm > 0) + O << "#" << OffImm * 4; +} + +void ARMInstPrinter::printT2AddrModeSoRegOperand(const MCInst *MI, + unsigned OpNum) { + const MCOperand &MO1 = MI->getOperand(OpNum); + const MCOperand &MO2 = MI->getOperand(OpNum+1); + const MCOperand &MO3 = MI->getOperand(OpNum+2); + + O << "[" << getRegisterName(MO1.getReg()); + + assert(MO2.getReg() && "Invalid so_reg load / store address!"); + O << ", " << getRegisterName(MO2.getReg()); + + unsigned ShAmt = MO3.getImm(); + if (ShAmt) { + assert(ShAmt <= 3 && "Not a valid Thumb2 addressing mode!"); + O << ", lsl #" << ShAmt; + } + O << "]"; +} + +void ARMInstPrinter::printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum) { + O << '#' << MI->getOperand(OpNum).getImm(); +} + +void ARMInstPrinter::printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum) { + O << '#' << MI->getOperand(OpNum).getImm(); +} + diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 9a3cbc3..d41b5df 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -48,32 +48,33 @@ public: void printAddrMode5Operand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printAddrMode6Operand(const MCInst *MI, unsigned OpNum); + void printAddrMode6OffsetOperand(const MCInst *MI, unsigned OpNum); void printAddrModePCOperand(const MCInst *MI, unsigned OpNum, const char *Modifier = 0); void printBitfieldInvMaskImmOperand(const MCInst *MI, unsigned OpNum); void printThumbS4ImmOperand(const MCInst *MI, unsigned OpNum); - void printThumbITMask(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum) {} + void printThumbITMask(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeRROperand(const MCInst *MI, unsigned OpNum); void printThumbAddrModeRI5Operand(const MCInst *MI, unsigned OpNum, - unsigned Scale) {} - void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum) {} - void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum) {} + unsigned Scale); + void printThumbAddrModeS1Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeS2Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeS4Operand(const MCInst *MI, unsigned OpNum); + void printThumbAddrModeSPOperand(const MCInst *MI, unsigned OpNum); - void printT2SOOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum) {} - void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {} + void printT2SOOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm12Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8s4Operand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8OffsetOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeImm8s4OffsetOperand(const MCInst *MI, unsigned OpNum); + void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum); - void printCPSOptionOperand(const MCInst *MI, unsigned OpNum) {} - void printMSRMaskOperand(const MCInst *MI, unsigned OpNum) {} - void printNegZeroOperand(const MCInst *MI, unsigned OpNum) {} + void printCPSOptionOperand(const MCInst *MI, unsigned OpNum); + void printMSRMaskOperand(const MCInst *MI, unsigned OpNum); + void printNegZeroOperand(const MCInst *MI, unsigned OpNum); void printPredicateOperand(const MCInst *MI, unsigned OpNum); void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum); void printSBitModifierOperand(const MCInst *MI, unsigned OpNum); @@ -82,10 +83,10 @@ public: const char *Modifier); void printJTBlockOperand(const MCInst *MI, unsigned OpNum) {} void printJT2BlockOperand(const MCInst *MI, unsigned OpNum) {} - void printTBAddrMode(const MCInst *MI, unsigned OpNum) {} + void printTBAddrMode(const MCInst *MI, unsigned OpNum); void printNoHashImmediate(const MCInst *MI, unsigned OpNum); - void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {} - void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {} + void printVFPf32ImmOperand(const MCInst *MI, unsigned OpNum); + void printVFPf64ImmOperand(const MCInst *MI, unsigned OpNum); void printHex8ImmOperand(const MCInst *MI, int OpNum) {} void printHex16ImmOperand(const MCInst *MI, int OpNum) {} void printHex32ImmOperand(const MCInst *MI, int OpNum) {} diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index d9942c8..c36fe63 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -64,16 +64,16 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD2LNq16a: - case ARM::VLD2LNq32a: + case ARM::VLD2LNq16: + case ARM::VLD2LNq32: FirstOpnd = 0; NumRegs = 2; Offset = 0; Stride = 2; return true; - case ARM::VLD2LNq16b: - case ARM::VLD2LNq32b: + case ARM::VLD2LNq16odd: + case ARM::VLD2LNq32odd: FirstOpnd = 0; NumRegs = 2; Offset = 1; @@ -91,34 +91,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 3; return true; - case ARM::VLD3q8a: - case ARM::VLD3q16a: - case ARM::VLD3q32a: + case ARM::VLD3q8_UPD: + case ARM::VLD3q16_UPD: + case ARM::VLD3q32_UPD: FirstOpnd = 0; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VLD3q8b: - case ARM::VLD3q16b: - case ARM::VLD3q32b: + case ARM::VLD3q8odd_UPD: + case ARM::VLD3q16odd_UPD: + case ARM::VLD3q32odd_UPD: FirstOpnd = 0; NumRegs = 3; Offset = 1; Stride = 2; return true; - case ARM::VLD3LNq16a: - case ARM::VLD3LNq32a: + case ARM::VLD3LNq16: + case ARM::VLD3LNq32: FirstOpnd = 0; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VLD3LNq16b: - case ARM::VLD3LNq32b: + case ARM::VLD3LNq16odd: + case ARM::VLD3LNq32odd: FirstOpnd = 0; NumRegs = 3; Offset = 1; @@ -136,34 +136,34 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, NumRegs = 4; return true; - case ARM::VLD4q8a: - case ARM::VLD4q16a: - case ARM::VLD4q32a: + case ARM::VLD4q8_UPD: + case ARM::VLD4q16_UPD: + case ARM::VLD4q32_UPD: FirstOpnd = 0; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VLD4q8b: - case ARM::VLD4q16b: - case ARM::VLD4q32b: + case ARM::VLD4q8odd_UPD: + case ARM::VLD4q16odd_UPD: + case ARM::VLD4q32odd_UPD: FirstOpnd = 0; NumRegs = 4; Offset = 1; Stride = 2; return true; - case ARM::VLD4LNq16a: - case ARM::VLD4LNq32a: + case ARM::VLD4LNq16: + case ARM::VLD4LNq32: FirstOpnd = 0; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VLD4LNq16b: - case ARM::VLD4LNq32b: + case ARM::VLD4LNq16odd: + case ARM::VLD4LNq32odd: FirstOpnd = 0; NumRegs = 4; Offset = 1; @@ -177,28 +177,28 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; - case ARM::VST2LNq16a: - case ARM::VST2LNq32a: - FirstOpnd = 4; + case ARM::VST2LNq16: + case ARM::VST2LNq32: + FirstOpnd = 2; NumRegs = 2; Offset = 0; Stride = 2; return true; - case ARM::VST2LNq16b: - case ARM::VST2LNq32b: - FirstOpnd = 4; + case ARM::VST2LNq16odd: + case ARM::VST2LNq32odd: + FirstOpnd = 2; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,39 +211,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 3; return true; - case ARM::VST3q8a: - case ARM::VST3q16a: - case ARM::VST3q32a: - FirstOpnd = 5; + case ARM::VST3q8_UPD: + case ARM::VST3q16_UPD: + case ARM::VST3q32_UPD: + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VST3q8b: - case ARM::VST3q16b: - case ARM::VST3q32b: - FirstOpnd = 5; + case ARM::VST3q8odd_UPD: + case ARM::VST3q16odd_UPD: + case ARM::VST3q32odd_UPD: + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; return true; - case ARM::VST3LNq16a: - case ARM::VST3LNq32a: - FirstOpnd = 4; + case ARM::VST3LNq16: + case ARM::VST3LNq32: + FirstOpnd = 2; NumRegs = 3; Offset = 0; Stride = 2; return true; - case ARM::VST3LNq16b: - case ARM::VST3LNq32b: - FirstOpnd = 4; + case ARM::VST3LNq16odd: + case ARM::VST3LNq32odd: + FirstOpnd = 2; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,39 +256,39 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 4; + FirstOpnd = 2; NumRegs = 4; return true; - case ARM::VST4q8a: - case ARM::VST4q16a: - case ARM::VST4q32a: - FirstOpnd = 5; + case ARM::VST4q8_UPD: + case ARM::VST4q16_UPD: + case ARM::VST4q32_UPD: + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VST4q8b: - case ARM::VST4q16b: - case ARM::VST4q32b: - FirstOpnd = 5; + case ARM::VST4q8odd_UPD: + case ARM::VST4q16odd_UPD: + case ARM::VST4q32odd_UPD: + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2; return true; - case ARM::VST4LNq16a: - case ARM::VST4LNq32a: - FirstOpnd = 4; + case ARM::VST4LNq16: + case ARM::VST4LNq32: + FirstOpnd = 2; NumRegs = 4; Offset = 0; Stride = 2; return true; - case ARM::VST4LNq16b: - case ARM::VST4LNq32b: - FirstOpnd = 4; + case ARM::VST4LNq16odd: + case ARM::VST4LNq32odd: + FirstOpnd = 2; NumRegs = 4; Offset = 1; Stride = 2; diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index f5ba155..f36d4ef 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -78,14 +78,16 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NCC == OCC) { - Mask |= (1 << Pos); - } else if (NCC != CC) + if (NCC == CC || NCC == OCC) + Mask |= (NCC & 1) << Pos; + else break; --Pos; ++MBBI; } Mask |= (1 << Pos); + // Tag along (firstcond[0] << 4) with the mask. + Mask |= (CC & 1) << 4; MIB.addImm(Mask); Modified = true; ++NumITs; diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td index 6d82875..d984556 100644 --- a/lib/Target/Alpha/AlphaInstrFormats.td +++ b/lib/Target/Alpha/AlphaInstrFormats.td @@ -56,16 +56,16 @@ class MfcForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { bits<5> Ra; - let OutOperandList = (ops GPRC:$RA); - let InOperandList = (ops); + let OutOperandList = (outs GPRC:$RA); + let InOperandList = (ins); let Inst{25-21} = Ra; let Inst{20-16} = 0; let Inst{15-0} = fc; } class MfcPForm<bits<6> opcode, bits<16> fc, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { - let OutOperandList = (ops); - let InOperandList = (ops); + let OutOperandList = (outs); + let InOperandList = (ins); let Inst{25-21} = 0; let Inst{20-16} = 0; let Inst{15-0} = fc; @@ -77,7 +77,7 @@ class MbrForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass bits<5> Rb; bits<14> disp; - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; let Inst{25-21} = Ra; @@ -92,7 +92,7 @@ class MbrpForm<bits<6> opcode, bits<2> TB, dag OL, string asmstr, list<dag> patt bits<5> Rb; bits<14> disp; - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; let Inst{25-21} = Ra; @@ -107,7 +107,7 @@ def target : Operand<OtherVT> {} let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { class BFormN<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; bits<64> Opc; //dummy bits<5> Ra; @@ -122,8 +122,8 @@ let isBranch = 1, isTerminator = 1 in class BFormD<bits<6> opcode, string asmstr, list<dag> pattern, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { let Pattern = pattern; - let OutOperandList = (ops); - let InOperandList = (ops target:$DISP); + let OutOperandList = (outs); + let InOperandList = (ins target:$DISP); bits<5> Ra; bits<21> disp; @@ -250,7 +250,7 @@ class FPForm<bits<6> opcode, bits<11> fun, string asmstr, list<dag> pattern, Ins //3.3.5 class PALForm<bits<6> opcode, dag OL, string asmstr, InstrItinClass itin> : InstAlpha<opcode, asmstr, itin> { - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = OL; bits<26> Function; diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 91e58ce..d5d5e02 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -392,12 +392,12 @@ def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0 let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { - def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine - def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine + def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine + def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine } let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in -def JMP : MbrpForm< 0x1A, 0x00, (ops GPRC:$RS), "jmp $$31,($RS),0", +def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", [(brind GPRC:$RS)], s_jsr>; //Jump let isCall = 1, Ra = 26, @@ -414,18 +414,18 @@ let isCall = 1, Ra = 26, Rb = 27, disp = 0, F0, F1, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in { - def JSR : MbrForm< 0x1A, 0x01, (ops ), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine + def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine } let isCall = 1, Ra = 23, Rb = 27, disp = 0, Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in - def JSRs : MbrForm< 0x1A, 0x01, (ops ), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem + def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem -def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ops GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return +def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return -let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)", [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow", @@ -445,7 +445,7 @@ def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow", } -let OutOperandList = (ops), InOperandList = (ops GPRC:$RA, s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in { def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)", [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow", @@ -465,7 +465,7 @@ def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow", } //Load address -let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)", [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>; def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow", @@ -476,25 +476,25 @@ def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh", [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high } -let OutOperandList = (ops), InOperandList = (ops F4RC:$RA, s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in { def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)", [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow", [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; } -let OutOperandList = (ops F4RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)", [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow", [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>; } -let OutOperandList = (ops), InOperandList = (ops F8RC:$RA, s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in { def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)", [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow", [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; } -let OutOperandList = (ops F8RC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in { +let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)", [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow", @@ -570,15 +570,15 @@ def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr), //load address, rellocated gpdist form -let OutOperandList = (ops GPRC:$RA), - InOperandList = (ops s16imm:$DISP, GPRC:$RB, s16imm:$NUM), +let OutOperandList = (outs GPRC:$RA), + InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM), mayLoad = 1 in { def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address } //Load quad, rellocated literal form -let OutOperandList = (ops GPRC:$RA), InOperandList = (ops s64imm:$DISP, GPRC:$RB) in +let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal", [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>; def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB), @@ -591,8 +591,8 @@ let OutOperandList = (outs GPRC:$RR), def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>; def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>; } -let OutOperandList = (ops GPRC:$RA), - InOperandList = (ops s64imm:$DISP, GPRC:$RB), +let OutOperandList = (outs GPRC:$RA), + InOperandList = (ins s64imm:$DISP, GPRC:$RB), mayLoad = 1 in { def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>; def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>; @@ -611,11 +611,11 @@ def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), //Floats -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC", [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>; -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F4RC:$RA, F4RC:$RB) in { +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in { def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC", [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>; def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC", @@ -634,11 +634,11 @@ def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", //Doubles -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC", [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RA, F8RC:$RB) in { +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in { def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC", [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>; def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC", @@ -665,13 +665,13 @@ def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>; } //More CPYS forms: -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RA, F8RC:$RB) in { +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in { def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>; def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>; } -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RA, F4RC:$RB) in { +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in { def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>; def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent @@ -680,7 +680,7 @@ def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", } //conditional moves, floats -let OutOperandList = (ops F4RC:$RDEST), InOperandList = (ops F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), +let OutOperandList = (outs F4RC:$RDEST), InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), isTwoAddress = 1 in { def FCMOVEQS : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if = zero def FCMOVGES : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if >= zero @@ -690,7 +690,7 @@ def FCMOVLTS : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST",[], s_fcmov>; def FCMOVNES : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST",[], s_fcmov>; //FCMOVE if != zero } //conditional moves, doubles -let OutOperandList = (ops F8RC:$RDEST), InOperandList = (ops F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), +let OutOperandList = (outs F8RC:$RDEST), InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), isTwoAddress = 1 in { def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; @@ -790,33 +790,33 @@ def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), -let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F4RC:$RA), Fb = 31 in +let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC", [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating -let OutOperandList = (ops GPRC:$RC), InOperandList = (ops F8RC:$RA), Fb = 31 in +let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC", [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC", [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops GPRC:$RA), Fb = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC", [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC", [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC", [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC", [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>; -let OutOperandList = (ops F8RC:$RC), InOperandList = (ops F4RC:$RB), Fa = 31 in +let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC", [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>; -let OutOperandList = (ops F4RC:$RC), InOperandList = (ops F8RC:$RB), Fa = 31 in +let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC", [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>; @@ -829,20 +829,20 @@ def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf), //Branching ///////////////////////////////////////////////////////// class br_icc<bits<6> opc, string asmstr> - : BFormN<opc, (ops u64imm:$opc, GPRC:$R, target:$dst), + : BFormN<opc, (ins u64imm:$opc, GPRC:$R, target:$dst), !strconcat(asmstr, " $R,$dst"), s_icbr>; class br_fcc<bits<6> opc, string asmstr> - : BFormN<opc, (ops u64imm:$opc, F8RC:$R, target:$dst), + : BFormN<opc, (ins u64imm:$opc, F8RC:$R, target:$dst), !strconcat(asmstr, " $R,$dst"), s_fbr>; let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { let Ra = 31 in def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; -def COND_BRANCH_I : BFormN<0, (ops u64imm:$opc, GPRC:$R, target:$dst), +def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", s_icbr>; -def COND_BRANCH_F : BFormN<0, (ops u64imm:$opc, F8RC:$R, target:$dst), +def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst", s_fbr>; //Branches, int diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index 88ff85f..e3c3993 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -29,7 +29,8 @@ def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd, def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; @@ -610,7 +611,7 @@ def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb), def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc), "$dst = $cc;", - [(set D:$dst, (zext JustCC:$cc))]>; + [/*(set D:$dst, (zext JustCC:$cc))*/]>; def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc), "$dst = cc;", []>; @@ -859,10 +860,10 @@ def : Pat<(BfinCall (i32 tglobaladdr:$dst)), def : Pat<(BfinCall (i32 texternalsym:$dst)), (CALLa texternalsym:$dst)>; -def : Pat<(sext JustCC:$cc), - (NEG (MOVECC_zext JustCC:$cc))>; -def : Pat<(anyext JustCC:$cc), - (MOVECC_zext JustCC:$cc)>; +//def : Pat<(sext JustCC:$cc), +// (NEG (MOVECC_zext JustCC:$cc))>; +//def : Pat<(anyext JustCC:$cc), +// (MOVECC_zext JustCC:$cc)>; def : Pat<(i16 (zext JustCC:$cc)), (EXTRACT_SUBREG (MOVECC_zext JustCC:$cc), bfin_subreg_lo16)>; def : Pat<(i16 (sext JustCC:$cc)), diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index ea9480d..34a8d38 100644 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -53,6 +53,10 @@ std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, unsigned BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const { + if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l' + || Name[2] != 'v' || Name[3] != 'm') + return 0; // All intrinsics start with 'llvm.' + #define GET_FUNCTION_RECOGNIZER #include "BlackfinGenIntrinsics.inc" #undef GET_FUNCTION_RECOGNIZER diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index b1ba0d2..0c265ad 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -111,7 +111,8 @@ namespace { static char ID; explicit CWriter(formatted_raw_ostream &o) : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), - TheModule(0), TAsm(0), TD(0), OpaqueCounter(0), NextAnonValueNumber(0) { + TheModule(0), TAsm(0), TCtx(0), TD(0), OpaqueCounter(0), + NextAnonValueNumber(0) { FPCounter = 0; } @@ -147,6 +148,8 @@ namespace { delete IL; delete TD; delete Mang; + delete TCtx; + delete TAsm; FPConstantMap.clear(); TypeNames.clear(); ByValParams.clear(); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index ad12604..5068f77 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -1133,16 +1133,14 @@ class XSBHInst<dag OOL, dag IOL, list<dag> pattern>: "xsbh\t$rDst, $rSrc", IntegerOp, pattern>; -class XSBHVecInst<ValueType vectype>: - XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>; - class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>: XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc), pattern>; multiclass ExtendByteHalfword { - def v16i8: XSBHVecInst<v8i16>; + def v16i8: XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), + [ + /*(set (v8i16 VECREG:$rDst), (sext (v8i16 VECREG:$rSrc)))*/]>; def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc), [(set R16C:$rDst, (sext R8C:$rSrc))]>; def r16: XSBHInRegInst<R16C, @@ -1200,8 +1198,8 @@ class XSWDInst<dag OOL, dag IOL, list<dag> pattern>: class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>: XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc), - [(set (out_vectype VECREG:$rDst), - (sext (out_vectype VECREG:$rSrc)))]>; + [/*(set (out_vectype VECREG:$rDst), + (sext (out_vectype VECREG:$rSrc)))*/]>; class XSWDRegInst<RegisterClass in_rclass, RegisterClass out_rclass>: XSWDInst<(outs out_rclass:$rDst), (ins in_rclass:$rSrc), @@ -4146,7 +4144,7 @@ def CFSif32 : def FESDvec : RRForm_1<0b00011101110, (outs VECREG:$rT), (ins VECREG:$rA), "fesd\t$rT, $rA", SPrecFP, - [(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))]>; + [/*(set (v2f64 VECREG:$rT), (fextend (v4f32 VECREG:$rA)))*/]>; def FESDf32 : RRForm_1<0b00011101110, (outs R64FP:$rT), (ins R32FP:$rA), diff --git a/lib/Target/CellSPU/SPUNodes.td b/lib/Target/CellSPU/SPUNodes.td index 8507861..846c7ed 100644 --- a/lib/Target/CellSPU/SPUNodes.td +++ b/lib/Target/CellSPU/SPUNodes.td @@ -28,7 +28,8 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPUCallSeq, def SDT_SPUCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; def SPUcall : SDNode<"SPUISD::CALL", SDT_SPUCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; // Operand type constraints for vector shuffle/permute operations def SDT_SPUshuffle : SDTypeProfile<1, 3, [ diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp index c8faffc..4931860 100644 --- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp +++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp @@ -57,6 +57,10 @@ std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys, unsigned MBlazeIntrinsicInfo:: lookupName(const char *Name, unsigned Len) const { + if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l' + || Name[2] != 'v' || Name[3] != 'm') + return 0; // All intrinsics start with 'llvm.' + #define GET_FUNCTION_RECOGNIZER #include "MBlazeGenIntrinsics.inc" #undef GET_FUNCTION_RECOGNIZER diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index cef3697..2b9e941 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -29,7 +29,8 @@ def SDT_MipsCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>; // Call def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, + SDNPVariadic]>; // Hi and Lo nodes are used to handle global addresses. Used on // MipsISelLowering to lower stuff like GlobalAddress, ExternalSymbol diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index b6eceb3..1001d29 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -184,7 +184,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { // by any chance, as we do not link in those as .bc lib. So these calls // are always external and it is safe to emit an extern. if (PAN::isMemIntrinsic(Sym->getName())) - LibcallDecls.push_back(createESName(Sym->getName())); + LibcallDecls.insert(Sym->getName()); O << *Sym; break; @@ -199,7 +199,7 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { Printname = PAN::Rename(Sname); } // Record these decls, we need to print them in asm as extern. - LibcallDecls.push_back(createESName(Printname)); + LibcallDecls.insert(Printname); } O << Printname; @@ -221,18 +221,6 @@ void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); } -// This function is used to sort the decls list. -// should return true if s1 should come before s2. -static bool is_before(const char *s1, const char *s2) { - return strcmp(s1, s2) <= 0; -} - -// This is used by list::unique below. -// unique will filter out duplicates if it knows them. -static bool is_duplicate(const char *s1, const char *s2) { - return !strcmp(s1, s2); -} - /// printLibcallDecls - print the extern declarations for compiler /// intrinsics. /// @@ -241,12 +229,9 @@ void PIC16AsmPrinter::printLibcallDecls() { if (LibcallDecls.empty()) return; O << MAI->getCommentString() << "External decls for libcalls - BEGIN." <<"\n"; - // Remove duplicate entries. - LibcallDecls.sort(is_before); - LibcallDecls.unique(is_duplicate); - for (std::list<const char*>::const_iterator I = LibcallDecls.begin(); - I != LibcallDecls.end(); I++) { + for (std::set<std::string>::const_iterator I = LibcallDecls.begin(), + E = LibcallDecls.end(); I != E; I++) { O << MAI->getExternDirective() << *I << "\n"; } O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n"; diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h index 519be4c..8063fcc 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.h @@ -25,6 +25,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetMachine.h" #include <list> +#include <set> #include <string> namespace llvm { @@ -80,7 +81,7 @@ namespace llvm { PIC16TargetLowering *PTLI; PIC16DbgInfo DbgInfo; const PIC16MCAsmInfo *PMAI; - std::list<const char *> LibcallDecls; // List of extern decls. + std::set<std::string> LibcallDecls; // Sorted & uniqued set of extern decls. std::vector<const GlobalVariable *> ExternalVarDecls; std::vector<const GlobalVariable *> ExternalVarDefs; }; diff --git a/lib/Target/PIC16/PIC16Section.h b/lib/Target/PIC16/PIC16Section.h index 3a8bbfb..566f920 100644 --- a/lib/Target/PIC16/PIC16Section.h +++ b/lib/Target/PIC16/PIC16Section.h @@ -45,7 +45,7 @@ namespace llvm { PIC16Section(const StringRef &name, SectionKind K, const std::string &addr, int color) - : MCSection(K), Name(name), Address(addr), Color(color) { + : MCSection(K), Name(name), Address(addr), Color(color), Size(0) { } public: diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 845cd8f..532a3ec 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -111,9 +111,11 @@ def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_PPCCallSeqEnd, def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>; def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; @@ -124,16 +126,18 @@ def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def PPCbctrl_SVR4 : SDNode<"PPCISD::BCTRL_SVR4", SDTNone, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def PPCtc_return : SDNode<"PPCISD::TC_RETURN", SDT_PPCTC_ret, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>; def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutFlag]>; diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index cac6962..c4a7408 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -20,7 +20,7 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; if (TheTriple.getOS() == Triple::Darwin) diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index d88d508..9489580 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -527,11 +527,11 @@ let Uses = [O0, O1, O2, O3, O4, O5], def JMPLrr : F3_1<2, 0b111000, (outs), (ins MEMrr:$ptr), "call $ptr", - [(call ADDRrr:$ptr)]>; + [(call ADDRrr:$ptr)]>; def JMPLri : F3_2<2, 0b111000, (outs), (ins MEMri:$ptr), "call $ptr", - [(call ADDRri:$ptr)]>; + [(call ADDRri:$ptr)]>; } // Section B.28 - Read State Register Instructions diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index a75b85d..0d1af23 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -47,7 +47,7 @@ def SDT_Address : SDTypeProfile<1, 1, def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, SDNPVariadic]>; def SystemZcallseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart, [SDNPHasChain, SDNPOutFlag]>; diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 9a16808..643b397 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -460,6 +460,15 @@ uint64_t TargetData::getTypeSizeInBits(const Type *Ty) const { case Type::StructTyID: // Get the layout annotation... which is lazily created on demand. return getStructLayout(cast<StructType>(Ty))->getSizeInBits(); + case Type::UnionTyID: { + const UnionType *UnTy = cast<UnionType>(Ty); + uint64_t Size = 0; + for (UnionType::element_iterator i = UnTy->element_begin(), + e = UnTy->element_end(); i != e; ++i) { + Size = std::max(Size, getTypeSizeInBits(*i)); + } + return Size; + } case Type::IntegerTyID: return cast<IntegerType>(Ty)->getBitWidth(); case Type::VoidTyID: @@ -516,6 +525,17 @@ unsigned char TargetData::getAlignment(const Type *Ty, bool abi_or_pref) const { unsigned Align = getAlignmentInfo(AGGREGATE_ALIGN, 0, abi_or_pref, Ty); return std::max(Align, (unsigned)Layout->getAlignment()); } + case Type::UnionTyID: { + const UnionType *UnTy = cast<UnionType>(Ty); + unsigned Align = 1; + + // Unions need the maximum alignment of all their entries + for (UnionType::element_iterator i = UnTy->element_begin(), + e = UnTy->element_end(); i != e; ++i) { + Align = std::max(Align, (unsigned)getAlignment(*i, abi_or_pref)); + } + return Align; + } case Type::IntegerTyID: case Type::VoidTyID: AlignType = INTEGER_ALIGN; @@ -600,6 +620,11 @@ uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices, // Update Ty to refer to current element Ty = STy->getElementType(FieldNo); + } else if (const UnionType *UnTy = dyn_cast<UnionType>(*TI)) { + unsigned FieldNo = cast<ConstantInt>(Indices[CurIDX])->getZExtValue(); + + // Offset into union is canonically 0, but type changes + Ty = UnTy->getElementType(FieldNo); } else { // Update Ty to refer to current element Ty = cast<SequentialType>(Ty)->getElementType(); diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index a093e2d..44722b3 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -317,7 +317,7 @@ getExprForDwarfReference(const MCSymbol *Sym, Mangler *Mang, case dwarf::DW_EH_PE_pcrel: { // Emit a label to the streamer for the current position. This gives us // .-foo addressing. - MCSymbol *PCSym = getContext().GetOrCreateTemporarySymbol(); + MCSymbol *PCSym = getContext().CreateTempSymbol(); Streamer.EmitLabel(PCSym); const MCExpr *PC = MCSymbolRefExpr::Create(PCSym, getContext()); return MCBinaryExpr::CreateSub(Res, PC, getContext()); diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index dde86fb..47873d1 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -29,6 +29,9 @@ struct X86Operand; class X86ATTAsmParser : public TargetAsmParser { MCAsmParser &Parser; +protected: + unsigned Is64Bit : 1; + private: MCAsmParser &getParser() const { return Parser; } @@ -45,6 +48,8 @@ private: bool ParseDirectiveWord(unsigned Size, SMLoc L); + void InstructionCleanup(MCInst &Inst); + /// @name Auto-generated Match Functions /// { @@ -62,7 +67,23 @@ public: virtual bool ParseDirective(AsmToken DirectiveID); }; - + +class X86_32ATTAsmParser : public X86ATTAsmParser { +public: + X86_32ATTAsmParser(const Target &T, MCAsmParser &_Parser) + : X86ATTAsmParser(T, _Parser) { + Is64Bit = false; + } +}; + +class X86_64ATTAsmParser : public X86ATTAsmParser { +public: + X86_64ATTAsmParser(const Target &T, MCAsmParser &_Parser) + : X86ATTAsmParser(T, _Parser) { + Is64Bit = true; + } +}; + } // end anonymous namespace /// @name Auto-generated Match Functions @@ -548,8 +569,10 @@ ParseInstruction(const StringRef &Name, SMLoc NameLoc, Operands.size() == 3 && static_cast<X86Operand*>(Operands[1])->isImm() && isa<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm()) && - cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) + cast<MCConstantExpr>(static_cast<X86Operand*>(Operands[1])->getImm())->getValue() == 1) { + delete Operands[1]; Operands.erase(Operands.begin() + 1); + } return false; } @@ -586,12 +609,30 @@ bool X86ATTAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { return false; } +// FIXME: Custom X86 cleanup function to implement a temporary hack to handle +// matching INCL/DECL correctly for x86_64. This needs to be replaced by a +// proper mechanism for supporting (ambiguous) feature dependent instructions. +void X86ATTAsmParser::InstructionCleanup(MCInst &Inst) { + if (!Is64Bit) return; + + switch (Inst.getOpcode()) { + case X86::DEC16r: Inst.setOpcode(X86::DEC64_16r); break; + case X86::DEC16m: Inst.setOpcode(X86::DEC64_16m); break; + case X86::DEC32r: Inst.setOpcode(X86::DEC64_32r); break; + case X86::DEC32m: Inst.setOpcode(X86::DEC64_32m); break; + case X86::INC16r: Inst.setOpcode(X86::INC64_16r); break; + case X86::INC16m: Inst.setOpcode(X86::INC64_16m); break; + case X86::INC32r: Inst.setOpcode(X86::INC64_32r); break; + case X86::INC32m: Inst.setOpcode(X86::INC64_32m); break; + } +} + extern "C" void LLVMInitializeX86AsmLexer(); // Force static initialization. extern "C" void LLVMInitializeX86AsmParser() { - RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target); - RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target); + RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target); + RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target); LLVMInitializeX86AsmLexer(); } diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index cbfc57a..7d29d97 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -427,7 +427,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { // MYGLOBAL + (. - PICBASE) // However, we can't generate a ".", so just emit a new label here and refer // to it. - MCSymbol *DotSym = OutContext.GetOrCreateTemporarySymbol(); + MCSymbol *DotSym = OutContext.CreateTempSymbol(); OutStreamer.EmitLabel(DotSym); // Now that we have emitted the label, lower the complex operand expression. diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp index a316860..7b7b5cb 100644 --- a/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -459,11 +459,11 @@ static void translateInstruction(MCInst &mcInst, } } -static const MCDisassembler *createX86_32Disassembler(const Target &T) { +static MCDisassembler *createX86_32Disassembler(const Target &T) { return new X86Disassembler::X86_32Disassembler; } -static const MCDisassembler *createX86_64Disassembler(const Target &T) { +static MCDisassembler *createX86_64Disassembler(const Target &T) { return new X86Disassembler::X86_64Disassembler; } diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index a0a04ba..4f02ed4 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -1355,8 +1355,8 @@ int decodeInstruction(struct InternalInstruction* insn, insn->length = insn->readerCursor - insn->startLocation; - dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %llu", - startLoc, insn->readerCursor, insn->length); + dbgprintf(insn, "Read from 0x%llx to 0x%llx: length %zu", + startLoc, insn->readerCursor, insn->length); if (insn->length > 15) dbgprintf(insn, "Instruction exceeds 15-byte limit"); diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6a4bdb5..2be51e1 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -191,6 +191,7 @@ include "X86CallingConv.td" // Currently the X86 assembly parser only supports ATT syntax. def ATTAsmParser : AsmParser { string AsmParserClassName = "ATTAsmParser"; + string AsmParserInstCleanup = "InstructionCleanup"; int Variant = 0; // Discard comments in assembly strings. diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp index a44afc6..754a200 100644 --- a/lib/Target/X86/X86AsmBackend.cpp +++ b/lib/Target/X86/X86AsmBackend.cpp @@ -9,39 +9,100 @@ #include "llvm/Target/TargetAsmBackend.h" #include "X86.h" +#include "X86FixupKinds.h" +#include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" +#include "llvm/MC/MachObjectWriter.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/Target/TargetAsmBackend.h" using namespace llvm; namespace { +static unsigned getFixupKindLog2Size(unsigned Kind) { + switch (Kind) { + default: assert(0 && "invalid fixup kind!"); + case X86::reloc_pcrel_1byte: + case FK_Data_1: return 0; + case FK_Data_2: return 1; + case X86::reloc_pcrel_4byte: + case X86::reloc_riprel_4byte: + case X86::reloc_riprel_4byte_movq_load: + case FK_Data_4: return 2; + case FK_Data_8: return 3; + } +} + class X86AsmBackend : public TargetAsmBackend { public: X86AsmBackend(const Target &T) : TargetAsmBackend(T) {} + + void ApplyFixup(const MCAsmFixup &Fixup, MCDataFragment &DF, + uint64_t Value) const { + unsigned Size = 1 << getFixupKindLog2Size(Fixup.Kind); + + assert(Fixup.Offset + Size <= DF.getContents().size() && + "Invalid fixup offset!"); + for (unsigned i = 0; i != Size; ++i) + DF.getContents()[Fixup.Offset + i] = uint8_t(Value >> (i * 8)); + } +}; + +class ELFX86AsmBackend : public X86AsmBackend { +public: + ELFX86AsmBackend(const Target &T) + : X86AsmBackend(T) { + HasAbsolutizedSet = true; + HasScatteredSymbols = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return 0; + } + + bool isVirtualSection(const MCSection &Section) const { + const MCSectionELF &SE = static_cast<const MCSectionELF&>(Section); + return SE.getType() == MCSectionELF::SHT_NOBITS;; + } }; class DarwinX86AsmBackend : public X86AsmBackend { public: DarwinX86AsmBackend(const Target &T) - : X86AsmBackend(T) {} - - virtual bool hasAbsolutizedSet() const { return true; } + : X86AsmBackend(T) { + HasAbsolutizedSet = true; + HasScatteredSymbols = true; + } - virtual bool hasScatteredSymbols() const { return true; } + bool isVirtualSection(const MCSection &Section) const { + const MCSectionMachO &SMO = static_cast<const MCSectionMachO&>(Section); + return (SMO.getType() == MCSectionMachO::S_ZEROFILL || + SMO.getType() == MCSectionMachO::S_GB_ZEROFILL); + } }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { public: DarwinX86_32AsmBackend(const Target &T) : DarwinX86AsmBackend(T) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new MachObjectWriter(OS, /*Is64Bit=*/false); + } }; class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { public: DarwinX86_64AsmBackend(const Target &T) - : DarwinX86AsmBackend(T) {} + : DarwinX86AsmBackend(T) { + HasReliableSymbolDifference = true; + } + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const { + return new MachObjectWriter(OS, /*Is64Bit=*/true); + } virtual bool doesSectionRequireSymbols(const MCSection &Section) const { // Temporary labels in the string literals sections require symbols. The @@ -65,7 +126,7 @@ TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_32AsmBackend(T); default: - return new X86AsmBackend(T); + return new ELFX86AsmBackend(T); } } @@ -75,6 +136,6 @@ TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T, case Triple::Darwin: return new DarwinX86_64AsmBackend(T); default: - return new X86AsmBackend(T); + return new ELFX86AsmBackend(T); } } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 96b652d..5d3edbb 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1166,6 +1166,21 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::stackprotector: { + // Emit code inline code to store the stack guard onto the stack. + EVT PtrTy = TLI.getPointerTy(); + + Value *Op1 = I.getOperand(1); // The guard's value. + AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); + + // Grab the frame index. + X86AddressMode AM; + if (!X86SelectAddress(Slot, AM)) return false; + + if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; + + return true; + } case Intrinsic::objectsize: { ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); const Type *Ty = I.getCalledFunction()->getReturnType(); diff --git a/lib/Target/X86/X86FixupKinds.h b/lib/Target/X86/X86FixupKinds.h index c8dac3c..a8117d4 100644 --- a/lib/Target/X86/X86FixupKinds.h +++ b/lib/Target/X86/X86FixupKinds.h @@ -17,7 +17,8 @@ namespace X86 { enum Fixups { reloc_pcrel_4byte = FirstTargetFixupKind, // 32-bit pcrel, e.g. a branch. reloc_pcrel_1byte, // 8-bit pcrel, e.g. branch_1 - reloc_riprel_4byte // 32-bit rip-relative + reloc_riprel_4byte, // 32-bit rip-relative + reloc_riprel_4byte_movq_load // 32-bit rip-relative in movq }; } } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 4058885..1c0ed7e 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -140,6 +140,21 @@ namespace { } namespace { + class X86ISelListener : public SelectionDAG::DAGUpdateListener { + SmallSet<SDNode*, 4> Deletes; + public: + explicit X86ISelListener() {} + virtual void NodeDeleted(SDNode *N, SDNode *E) { + Deletes.insert(N); + } + virtual void NodeUpdated(SDNode *N) { + // Ignore updates. + } + bool IsDeleted(SDNode *N) { + return Deletes.count(N); + } + }; + //===--------------------------------------------------------------------===// /// ISel - X86 specific code to select X86 machine instructions for /// SelectionDAG operations. @@ -187,6 +202,7 @@ namespace { bool MatchWrapper(SDValue N, X86ISelAddressMode &AM); bool MatchAddress(SDValue N, X86ISelAddressMode &AM); bool MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + X86ISelListener &DeadNodes, unsigned Depth); bool MatchAddressBase(SDValue N, X86ISelAddressMode &AM); bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, @@ -651,7 +667,8 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) { /// returning true if it cannot be done. This just pattern matches for the /// addressing mode. bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { - if (MatchAddressRecursively(N, AM, 0)) + X86ISelListener DeadNodes; + if (MatchAddressRecursively(N, AM, DeadNodes, 0)) return true; // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has @@ -680,6 +697,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) { } bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, + X86ISelListener &DeadNodes, unsigned Depth) { bool is64Bit = Subtarget->is64Bit(); DebugLoc dl = N.getDebugLoc(); @@ -845,7 +863,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, // Test if the LHS of the sub can be folded. X86ISelAddressMode Backup = AM; - if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) { + if (MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1) || + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + DeadNodes.IsDeleted(N.getNode())) { AM = Backup; break; } @@ -854,6 +876,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, AM = Backup; break; } + int Cost = 0; SDValue RHS = N.getNode()->getOperand(1); // If the RHS involves a register with multiple uses, this @@ -907,13 +930,33 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, case ISD::ADD: { X86ISelAddressMode Backup = AM; - if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1) && - !MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1)) - return false; + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1)) { + if (DeadNodes.IsDeleted(N.getNode())) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return true; + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, + DeadNodes, Depth+1)) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return DeadNodes.IsDeleted(N.getNode()); + } + + // Try again after commuting the operands. AM = Backup; - if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, Depth+1) && - !MatchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) - return false; + if (!MatchAddressRecursively(N.getNode()->getOperand(1), AM, + DeadNodes, Depth+1)) { + if (DeadNodes.IsDeleted(N.getNode())) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return true; + if (!MatchAddressRecursively(N.getNode()->getOperand(0), AM, + DeadNodes, Depth+1)) + // If it is successful but the recursive update causes N to be deleted, + // then it's not safe to continue. + return DeadNodes.IsDeleted(N.getNode()); + } AM = Backup; // If we couldn't fold both operands into the address at the same time, @@ -935,16 +978,19 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { X86ISelAddressMode Backup = AM; uint64_t Offset = CN->getSExtValue(); + + // Check to see if the LHS & C is zero. + if (!CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) + break; + // Start with the LHS as an addr mode. - if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) && + if (!MatchAddressRecursively(N.getOperand(0), AM, DeadNodes, Depth+1) && // Address could not have picked a GV address for the displacement. AM.GV == NULL && // On x86-64, the resultant disp must fit in 32-bits. (!is64Bit || X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M, - AM.hasSymbolicDisplacement())) && - // Check to see if the LHS & C is zero. - CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.hasSymbolicDisplacement()))) { AM.Disp += Offset; return false; } @@ -1015,7 +1061,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, CurDAG->RepositionNode(N.getNode(), Shl.getNode()); Shl.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, Shl); + CurDAG->ReplaceAllUsesWith(N, Shl, &DeadNodes); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); return false; @@ -1066,7 +1112,7 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM, NewSHIFT.getNode()->setNodeId(N.getNode()->getNodeId()); } - CurDAG->ReplaceAllUsesWith(N, NewSHIFT); + CurDAG->ReplaceAllUsesWith(N, NewSHIFT, &DeadNodes); AM.Scale = 1 << ShiftCst; AM.IndexReg = NewAND; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7d2140b..704f9c6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -2310,6 +2310,28 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isCalleeStructRet || isCallerStructRet) return false; + // If the call result is in ST0 / ST1, it needs to be popped off the x87 stack. + // Therefore if it's not used by the call it is not safe to optimize this into + // a sibcall. + bool Unused = false; + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + if (!Ins[i].Used) { + Unused = true; + break; + } + } + if (Unused) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CalleeCC, false, getTargetMachine(), + RVLocs, *DAG.getContext()); + CCInfo.AnalyzeCallResult(Ins, RetCC_X86); + for (unsigned i = 0; i != RVLocs.size(); ++i) { + CCValAssign &VA = RVLocs[i]; + if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) + return false; + } + } + // If the callee takes no arguments then go on to check the results of the // call. if (!Outs.empty()) { diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 4262c0ac..8cbb756 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -144,7 +144,7 @@ let isCall = 1 in // NOTE: this pattern doesn't match "X86call imm", because we do not know // that the offset between an arbitrary immediate and the call will fit in // the 32-bit pcrel field that we have. - def CALL64pcrel32 : Ii32<0xE8, RawFrm, + def CALL64pcrel32 : Ii32PCRel<0xE8, RawFrm, (outs), (ins i64i32imm_pcrel:$dst, variable_ops), "call{q}\t$dst", []>, Requires<[In64BitMode, NotWin64]>; @@ -511,6 +511,14 @@ def ADD64rr : RI<0x01, MRMDestReg, (outs GR64:$dst), [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), (implicit EFLAGS)]>; +// These are alternate spellings for use by the disassembler, we mark them as +// code gen only to ensure they aren't matched by the assembler. +let isCodeGenOnly = 1 in { + def ADD64rr_alt : RI<0x03, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR64:$src2), + "add{l}\t{$src2, $dst|$dst, $src2}", []>; +} + // Register-Integer Addition def ADD64ri8 : RIi8<0x83, MRM0r, (outs GR64:$dst), (ins GR64:$src1, i64i8imm:$src2), @@ -531,12 +539,6 @@ def ADD64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), [(set GR64:$dst, (add GR64:$src1, (load addr:$src2))), (implicit EFLAGS)]>; -// Register-Register Addition - Equivalent to the normal rr form (ADD64rr), but -// differently encoded. -def ADD64mrmrr : RI<0x03, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, GR64:$src2), - "add{l}\t{$src2, $dst|$dst, $src2}", []>; - } // isTwoAddress // Memory-Register Addition @@ -1225,59 +1227,59 @@ let Defs = [EFLAGS] in { def TEST64i32 : RIi32<0xa9, RawFrm, (outs), (ins i32imm:$src), "test{q}\t{$src, %rax|%rax, $src}", []>; let isCommutable = 1 in -def TEST64rr : RI<0x85, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), +def TEST64rr : RI<0x85, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR64:$src1, GR64:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR64:$src1, GR64:$src2), 0))]>; def TEST64rm : RI<0x85, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR64:$src1, (loadi64 addr:$src2)), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR64:$src1, (loadi64 addr:$src2)), + 0))]>; def TEST64ri32 : RIi32<0xF7, MRM0r, (outs), (ins GR64:$src1, i64i32imm:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR64:$src1, i64immSExt32:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR64:$src1, i64immSExt32:$src2), + 0))]>; def TEST64mi32 : RIi32<0xF7, MRM0m, (outs), (ins i64mem:$src1, i64i32imm:$src2), "test{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi64 addr:$src1), i64immSExt32:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and (loadi64 addr:$src1), + i64immSExt32:$src2), 0))]>; def CMP64i32 : RIi32<0x3D, RawFrm, (outs), (ins i32imm:$src), "cmp{q}\t{$src, %rax|%rax, $src}", []>; def CMP64rr : RI<0x39, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, GR64:$src2), - (implicit EFLAGS)]>; -def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), - "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; + [(set EFLAGS, (X86cmp GR64:$src1, GR64:$src2))]>; + +// These are alternate spellings for use by the disassembler, we mark them as +// code gen only to ensure they aren't matched by the assembler. +let isCodeGenOnly = 1 in { + def CMP64mrmrr : RI<0x3B, MRMSrcReg, (outs), (ins GR64:$src1, GR64:$src2), + "cmp{q}\t{$src2, $src1|$src1, $src2}", []>; +} + def CMP64mr : RI<0x39, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi64 addr:$src1), GR64:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi64 addr:$src1), GR64:$src2))]>; def CMP64rm : RI<0x3B, MRMSrcMem, (outs), (ins GR64:$src1, i64mem:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, (loadi64 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR64:$src1, (loadi64 addr:$src2)))]>; def CMP64ri8 : RIi8<0x83, MRM7r, (outs), (ins GR64:$src1, i64i8imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt8:$src2))]>; def CMP64ri32 : RIi32<0x81, MRM7r, (outs), (ins GR64:$src1, i64i32imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR64:$src1, i64immSExt32:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR64:$src1, i64immSExt32:$src2))]>; def CMP64mi8 : RIi8<0x83, MRM7m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi64 addr:$src1), i64immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi64 addr:$src1), + i64immSExt8:$src2))]>; def CMP64mi32 : RIi32<0x81, MRM7m, (outs), (ins i64mem:$src1, i64i32imm:$src2), "cmp{q}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi64 addr:$src1), i64immSExt32:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi64 addr:$src1), + i64immSExt32:$src2))]>; } // Defs = [EFLAGS] // Bit tests. @@ -1285,8 +1287,7 @@ def CMP64mi32 : RIi32<0x81, MRM7m, (outs), let Defs = [EFLAGS] in { def BT64rr : RI<0xA3, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR64:$src1, GR64:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, GR64:$src2))]>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -1300,15 +1301,14 @@ def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2), def BT64ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR64:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR64:$src1, i64immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR64:$src1, i64immSExt8:$src2))]>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. def BT64mi8 : Ii8<0xBA, MRM4m, (outs), (ins i64mem:$src1, i64i8imm:$src2), "bt{q}\t{$src2, $src1|$src1, $src2}", - [(X86bt (loadi64 addr:$src1), i64immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt (loadi64 addr:$src1), + i64immSExt8:$src2))]>, TB; def BTC64rr : RI<0xBB, MRMDestReg, (outs), (ins GR64:$src1, GR64:$src2), "btc{q}\t{$src2, $src1|$src1, $src2}", []>, TB; @@ -1938,7 +1938,7 @@ def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off), // Comparisons. // TEST R,R is smaller than CMP R,0 -def : Pat<(parallel (X86cmp GR64:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR64:$src1, 0), (TEST64rr GR64:$src1, GR64:$src1)>; // Conditional moves with folded loads with operands swapped and conditions @@ -2233,21 +2233,6 @@ def : Pat<(parallel (X86add_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (ADD64rm GR64:$src1, addr:$src2)>; -// Memory-Register Addition with EFLAGS result -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Subtraction with EFLAGS result def : Pat<(parallel (X86sub_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2266,24 +2251,6 @@ def : Pat<(parallel (X86sub_flag GR64:$src1, i64immSExt32:$src2), (implicit EFLAGS)), (SUB64ri32 GR64:$src1, i64immSExt32:$src2)>; -// Memory-Register Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB64mr addr:$dst, GR64:$src2)>; - -// Memory-Integer Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), - i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Signed Integer Multiplication with EFLAGS result def : Pat<(parallel (X86smul_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2313,36 +2280,18 @@ def : Pat<(parallel (X86smul_flag (loadi64 addr:$src1), i64immSExt32:$src2), // INC and DEC with EFLAGS result. Note that these do not set CF. def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC64_16m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC64_16m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC64_32m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC64_32m addr:$dst)>, Requires<[In64BitMode]>; def : Pat<(parallel (X86inc_flag GR64:$src), (implicit EFLAGS)), (INC64r GR64:$src)>; -def : Pat<(parallel (store (i64 (X86inc_flag (loadi64 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC64m addr:$dst)>; def : Pat<(parallel (X86dec_flag GR64:$src), (implicit EFLAGS)), (DEC64r GR64:$src)>; -def : Pat<(parallel (store (i64 (X86dec_flag (loadi64 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC64m addr:$dst)>; // Register-Register Logical Or with EFLAGS result def : Pat<(parallel (X86or_flag GR64:$src1, GR64:$src2), @@ -2362,20 +2311,6 @@ def : Pat<(parallel (X86or_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (OR64rm GR64:$src1, addr:$src2)>; -// Memory-Register Logical Or with EFLAGS result -def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi64 addr:$dst), i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Logical XOr with EFLAGS result def : Pat<(parallel (X86xor_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2394,21 +2329,6 @@ def : Pat<(parallel (X86xor_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (XOR64rm GR64:$src1, addr:$src2)>; -// Memory-Register Logical XOr with EFLAGS result -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR64mi32 addr:$dst, i64immSExt32:$src2)>; - // Register-Register Logical And with EFLAGS result def : Pat<(parallel (X86and_flag GR64:$src1, GR64:$src2), (implicit EFLAGS)), @@ -2427,21 +2347,6 @@ def : Pat<(parallel (X86and_flag GR64:$src1, (loadi64 addr:$src2)), (implicit EFLAGS)), (AND64rm GR64:$src1, addr:$src2)>; -// Memory-Register Logical And with EFLAGS result -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), GR64:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND64mr addr:$dst, GR64:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), i64immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND64mi8 addr:$dst, i64immSExt8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi64 addr:$dst), - i64immSExt32:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND64mi32 addr:$dst, i64immSExt32:$src2)>; - //===----------------------------------------------------------------------===// // X86-64 SSE Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td index b730918..e6d1fee 100644 --- a/lib/Target/X86/X86InstrFPStack.td +++ b/lib/Target/X86/X86InstrFPStack.td @@ -562,15 +562,13 @@ def UCOM_Fpr64 : FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, def UCOM_Fpr80 : FpI_ <(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, []>; // FPSW = cmp ST(0) with ST(i) +// CC = ST(0) cmp ST(i) def UCOM_FpIr32: FpIf32<(outs), (ins RFP32:$lhs, RFP32:$rhs), CompareFP, - [(X86cmp RFP32:$lhs, RFP32:$rhs), - (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) + [(set EFLAGS, (X86cmp RFP32:$lhs, RFP32:$rhs))]>; def UCOM_FpIr64: FpIf64<(outs), (ins RFP64:$lhs, RFP64:$rhs), CompareFP, - [(X86cmp RFP64:$lhs, RFP64:$rhs), - (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) + [(set EFLAGS, (X86cmp RFP64:$lhs, RFP64:$rhs))]>; def UCOM_FpIr80: FpI_<(outs), (ins RFP80:$lhs, RFP80:$rhs), CompareFP, - [(X86cmp RFP80:$lhs, RFP80:$rhs), - (implicit EFLAGS)]>; // CC = ST(0) cmp ST(i) + [(set EFLAGS, (X86cmp RFP80:$lhs, RFP80:$rhs))]>; } let Defs = [EFLAGS], Uses = [ST0] in { diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 1225b68..c80a18d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -21,8 +21,7 @@ def SDTIntShiftDOp: SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>]>; -// FIXME: Should be modelled as returning i32 -def SDTX86CmpTest : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>; +def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisSameAs<1, 2>]>; def SDTX86Cmov : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, @@ -120,12 +119,12 @@ def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def X86vastart_save_xmm_regs : SDNode<"X86ISD::VASTART_SAVE_XMM_REGS", SDT_X86VASTART_SAVE_XMM_REGS, - [SDNPHasChain]>; + [SDNPHasChain, SDNPVariadic]>; def X86callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_X86CallSeqStart, @@ -135,7 +134,8 @@ def X86callseq_end : [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def X86call : SDNode<"X86ISD::CALL", SDT_X86Call, - [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOutFlag, SDNPOptInFlag, + SDNPVariadic]>; def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr, [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore]>; @@ -158,7 +158,7 @@ def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET, [SDNPHasChain]>; def X86tcret : SDNode<"X86ISD::TC_RETURN", SDT_X86TCRET, - [SDNPHasChain, SDNPOptInFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; def X86add_flag : SDNode<"X86ISD::ADD", SDTBinaryArithWithFlags, [SDNPCommutative]>; @@ -661,9 +661,9 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // Loop instructions -def LOOP : I<0xE2, RawFrm, (ins brtarget8:$dst), (outs), "loop\t$dst", []>; -def LOOPE : I<0xE1, RawFrm, (ins brtarget8:$dst), (outs), "loope\t$dst", []>; -def LOOPNE : I<0xE0, RawFrm, (ins brtarget8:$dst), (outs), "loopne\t$dst", []>; +def LOOP : I<0xE2, RawFrm, (outs), (ins brtarget8:$dst), "loop\t$dst", []>; +def LOOPE : I<0xE1, RawFrm, (outs), (ins brtarget8:$dst), "loope\t$dst", []>; +def LOOPNE : I<0xE0, RawFrm, (outs), (ins brtarget8:$dst), "loopne\t$dst", []>; //===----------------------------------------------------------------------===// // Call Instructions... @@ -3200,17 +3200,16 @@ let Defs = [EFLAGS] in { let isCommutable = 1 in { // TEST X, Y --> TEST Y, X def TEST8rr : I<0x84, MRMSrcReg, (outs), (ins GR8:$src1, GR8:$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR8:$src1, GR8:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR8:$src1, GR8:$src2), 0))]>; def TEST16rr : I<0x85, MRMSrcReg, (outs), (ins GR16:$src1, GR16:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR16:$src1, GR16:$src2), 0), - (implicit EFLAGS)]>, + [(set EFLAGS, (X86cmp (and_su GR16:$src1, GR16:$src2), + 0))]>, OpSize; def TEST32rr : I<0x85, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR32:$src1, GR32:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR32:$src1, GR32:$src2), + 0))]>; } def TEST8i8 : Ii8<0xA8, RawFrm, (outs), (ins i8imm:$src), @@ -3222,48 +3221,46 @@ def TEST32i32 : Ii32<0xA9, RawFrm, (outs), (ins i32imm:$src), def TEST8rm : I<0x84, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR8:$src1, (loadi8 addr:$src2)), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR8:$src1, (loadi8 addr:$src2)), + 0))]>; def TEST16rm : I<0x85, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR16:$src1, (loadi16 addr:$src2)), 0), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (and GR16:$src1, + (loadi16 addr:$src2)), 0))]>, OpSize; def TEST32rm : I<0x85, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and GR32:$src1, (loadi32 addr:$src2)), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and GR32:$src1, + (loadi32 addr:$src2)), 0))]>; def TEST8ri : Ii8 <0xF6, MRM0r, // flags = GR8 & imm8 (outs), (ins GR8:$src1, i8imm:$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR8:$src1, imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR8:$src1, imm:$src2), 0))]>; def TEST16ri : Ii16<0xF7, MRM0r, // flags = GR16 & imm16 (outs), (ins GR16:$src1, i16imm:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR16:$src1, imm:$src2), 0), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (and_su GR16:$src1, imm:$src2), 0))]>, + OpSize; def TEST32ri : Ii32<0xF7, MRM0r, // flags = GR32 & imm32 (outs), (ins GR32:$src1, i32imm:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and_su GR32:$src1, imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and_su GR32:$src1, imm:$src2), 0))]>; def TEST8mi : Ii8 <0xF6, MRM0m, // flags = [mem8] & imm8 (outs), (ins i8mem:$src1, i8imm:$src2), "test{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi8 addr:$src1), imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and (loadi8 addr:$src1), imm:$src2), + 0))]>; def TEST16mi : Ii16<0xF7, MRM0m, // flags = [mem16] & imm16 (outs), (ins i16mem:$src1, i16imm:$src2), "test{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi16 addr:$src1), imm:$src2), 0), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (and (loadi16 addr:$src1), imm:$src2), + 0))]>, OpSize; def TEST32mi : Ii32<0xF7, MRM0m, // flags = [mem32] & imm32 (outs), (ins i32mem:$src1, i32imm:$src2), "test{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (and (loadi32 addr:$src1), imm:$src2), 0), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (and (loadi32 addr:$src1), imm:$src2), + 0))]>; } // Defs = [EFLAGS] @@ -3477,45 +3474,41 @@ def CMP32i32 : Ii32<0x3D, RawFrm, (outs), (ins i32imm:$src), def CMP8rr : I<0x38, MRMDestReg, (outs), (ins GR8 :$src1, GR8 :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR8:$src1, GR8:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR8:$src1, GR8:$src2))]>; def CMP16rr : I<0x39, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, GR16:$src2), (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, GR16:$src2))]>, OpSize; def CMP32rr : I<0x39, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, GR32:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, GR32:$src2))]>; def CMP8mr : I<0x38, MRMDestMem, (outs), (ins i8mem :$src1, GR8 :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi8 addr:$src1), GR8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi8 addr:$src1), GR8:$src2))]>; def CMP16mr : I<0x39, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi16 addr:$src1), GR16:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (loadi16 addr:$src1), GR16:$src2))]>, + OpSize; def CMP32mr : I<0x39, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi32 addr:$src1), GR32:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi32 addr:$src1), GR32:$src2))]>; def CMP8rm : I<0x3A, MRMSrcMem, (outs), (ins GR8 :$src1, i8mem :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR8:$src1, (loadi8 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR8:$src1, (loadi8 addr:$src2)))]>; def CMP16rm : I<0x3B, MRMSrcMem, (outs), (ins GR16:$src1, i16mem:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, (loadi16 addr:$src2)), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, (loadi16 addr:$src2)))]>, + OpSize; def CMP32rm : I<0x3B, MRMSrcMem, (outs), (ins GR32:$src1, i32mem:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, (loadi32 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, (loadi32 addr:$src2)))]>; // These are alternate spellings for use by the disassembler, we mark them as // code gen only to ensure they aren't matched by the assembler. @@ -3531,51 +3524,47 @@ let isCodeGenOnly = 1 in { def CMP8ri : Ii8<0x80, MRM7r, (outs), (ins GR8:$src1, i8imm:$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR8:$src1, imm:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR8:$src1, imm:$src2))]>; def CMP16ri : Ii16<0x81, MRM7r, (outs), (ins GR16:$src1, i16imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, imm:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, imm:$src2))]>, OpSize; def CMP32ri : Ii32<0x81, MRM7r, (outs), (ins GR32:$src1, i32imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, imm:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, imm:$src2))]>; def CMP8mi : Ii8 <0x80, MRM7m, (outs), (ins i8mem :$src1, i8imm :$src2), "cmp{b}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi8 addr:$src1), imm:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi8 addr:$src1), imm:$src2))]>; def CMP16mi : Ii16<0x81, MRM7m, (outs), (ins i16mem:$src1, i16imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi16 addr:$src1), imm:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (loadi16 addr:$src1), imm:$src2))]>, + OpSize; def CMP32mi : Ii32<0x81, MRM7m, (outs), (ins i32mem:$src1, i32imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi32 addr:$src1), imm:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi32 addr:$src1), imm:$src2))]>; def CMP16ri8 : Ii8<0x83, MRM7r, (outs), (ins GR16:$src1, i16i8imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp GR16:$src1, i16immSExt8:$src2))]>, + OpSize; def CMP16mi8 : Ii8<0x83, MRM7m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "cmp{w}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi16 addr:$src1), i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize; + [(set EFLAGS, (X86cmp (loadi16 addr:$src1), + i16immSExt8:$src2))]>, OpSize; def CMP32mi8 : Ii8<0x83, MRM7m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp (loadi32 addr:$src1), i32immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp (loadi32 addr:$src1), + i32immSExt8:$src2))]>; def CMP32ri8 : Ii8<0x83, MRM7r, (outs), (ins GR32:$src1, i32i8imm:$src2), "cmp{l}\t{$src2, $src1|$src1, $src2}", - [(X86cmp GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp GR32:$src1, i32immSExt8:$src2))]>; } // Defs = [EFLAGS] // Bit tests. @@ -3583,12 +3572,10 @@ def CMP32ri8 : Ii8<0x83, MRM7r, let Defs = [EFLAGS] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR16:$src1, GR16:$src2), - (implicit EFLAGS)]>, OpSize, TB; + [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, OpSize, TB; def BT32rr : I<0xA3, MRMDestReg, (outs), (ins GR32:$src1, GR32:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR32:$src1, GR32:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR32:$src1, GR32:$src2))]>, TB; // Unlike with the register+register form, the memory+register form of the // bt instruction does not ignore the high bits of the index. From ISel's @@ -3610,23 +3597,22 @@ def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2), def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR16:$src1, i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize, TB; + [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, + OpSize, TB; def BT32ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR32:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(X86bt GR32:$src1, i32immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt GR32:$src1, i32immSExt8:$src2))]>, TB; // Note that these instructions don't need FastBTMem because that // only applies when the other operand is in a register. When it's // an immediate, bt is still fast. def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", - [(X86bt (loadi16 addr:$src1), i16immSExt8:$src2), - (implicit EFLAGS)]>, OpSize, TB; + [(set EFLAGS, (X86bt (loadi16 addr:$src1), i16immSExt8:$src2)) + ]>, OpSize, TB; def BT32mi8 : Ii8<0xBA, MRM4m, (outs), (ins i32mem:$src1, i32i8imm:$src2), "bt{l}\t{$src2, $src1|$src1, $src2}", - [(X86bt (loadi32 addr:$src1), i32immSExt8:$src2), - (implicit EFLAGS)]>, TB; + [(set EFLAGS, (X86bt (loadi32 addr:$src1), i32immSExt8:$src2)) + ]>, TB; def BTC16rr : I<0xBB, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize, TB; @@ -4401,11 +4387,11 @@ def : Pat<(subc GR32:$src1, i32immSExt8:$src2), // Comparisons. // TEST R,R is smaller than CMP R,0 -def : Pat<(parallel (X86cmp GR8:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR8:$src1, 0), (TEST8rr GR8:$src1, GR8:$src1)>; -def : Pat<(parallel (X86cmp GR16:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR16:$src1, 0), (TEST16rr GR16:$src1, GR16:$src1)>; -def : Pat<(parallel (X86cmp GR32:$src1, 0), (implicit EFLAGS)), +def : Pat<(X86cmp GR32:$src1, 0), (TEST32rr GR32:$src1, GR32:$src1)>; // Conditional moves with folded loads with operands swapped and conditions @@ -4799,42 +4785,6 @@ def : Pat<(parallel (X86add_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Addition with EFLAGS result -def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer Addition with EFLAGS result -def : Pat<(parallel (store (X86add_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86add_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (ADD32mi8 addr:$dst, i32immSExt8:$src2)>; - // Register-Register Subtraction with EFLAGS result def : Pat<(parallel (X86sub_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), @@ -4874,43 +4824,6 @@ def : Pat<(parallel (X86sub_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (SUB32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer Subtraction with EFLAGS result -def : Pat<(parallel (store (X86sub_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86sub_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (SUB32mi8 addr:$dst, i32immSExt8:$src2)>; - - // Register-Register Signed Integer Multiply with EFLAGS result def : Pat<(parallel (X86smul_flag GR16:$src1, GR16:$src2), (implicit EFLAGS)), @@ -4969,36 +4882,18 @@ def : Pat<(parallel (X86smul_flag GR32:$src1, 2), // INC and DEC with EFLAGS result. Note that these do not set CF. def : Pat<(parallel (X86inc_flag GR8:$src), (implicit EFLAGS)), (INC8r GR8:$src)>; -def : Pat<(parallel (store (i8 (X86inc_flag (loadi8 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC8m addr:$dst)>; def : Pat<(parallel (X86dec_flag GR8:$src), (implicit EFLAGS)), (DEC8r GR8:$src)>; -def : Pat<(parallel (store (i8 (X86dec_flag (loadi8 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC8m addr:$dst)>; def : Pat<(parallel (X86inc_flag GR16:$src), (implicit EFLAGS)), (INC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i16 (X86inc_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC16m addr:$dst)>, Requires<[In32BitMode]>; def : Pat<(parallel (X86dec_flag GR16:$src), (implicit EFLAGS)), (DEC16r GR16:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i16 (X86dec_flag (loadi16 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC16m addr:$dst)>, Requires<[In32BitMode]>; def : Pat<(parallel (X86inc_flag GR32:$src), (implicit EFLAGS)), (INC32r GR32:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i32 (X86inc_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (INC32m addr:$dst)>, Requires<[In32BitMode]>; def : Pat<(parallel (X86dec_flag GR32:$src), (implicit EFLAGS)), (DEC32r GR32:$src)>, Requires<[In32BitMode]>; -def : Pat<(parallel (store (i32 (X86dec_flag (loadi32 addr:$dst))), addr:$dst), - (implicit EFLAGS)), - (DEC32m addr:$dst)>, Requires<[In32BitMode]>; // Register-Register Or with EFLAGS result def : Pat<(parallel (X86or_flag GR8:$src1, GR8:$src2), @@ -5039,42 +4934,6 @@ def : Pat<(parallel (X86or_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (OR32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register Or with EFLAGS result -def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer Or with EFLAGS result -def : Pat<(parallel (store (X86or_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86or_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (OR32mi8 addr:$dst, i32immSExt8:$src2)>; - // Register-Register XOr with EFLAGS result def : Pat<(parallel (X86xor_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), @@ -5114,42 +4973,6 @@ def : Pat<(parallel (X86xor_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (XOR32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register XOr with EFLAGS result -def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer XOr with EFLAGS result -def : Pat<(parallel (store (X86xor_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86xor_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (XOR32mi8 addr:$dst, i32immSExt8:$src2)>; - // Register-Register And with EFLAGS result def : Pat<(parallel (X86and_flag GR8:$src1, GR8:$src2), (implicit EFLAGS)), @@ -5189,42 +5012,6 @@ def : Pat<(parallel (X86and_flag GR32:$src1, i32immSExt8:$src2), (implicit EFLAGS)), (AND32ri8 GR32:$src1, i32immSExt8:$src2)>; -// Memory-Register And with EFLAGS result -def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), GR8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND8mr addr:$dst, GR8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), GR16:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND16mr addr:$dst, GR16:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), GR32:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND32mr addr:$dst, GR32:$src2)>; - -// Memory-Integer And with EFLAGS result -def : Pat<(parallel (store (X86and_flag (loadi8 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND8mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND16mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), imm:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND32mi addr:$dst, imm:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi16 addr:$dst), i16immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND16mi8 addr:$dst, i16immSExt8:$src2)>; -def : Pat<(parallel (store (X86and_flag (loadi32 addr:$dst), i32immSExt8:$src2), - addr:$dst), - (implicit EFLAGS)), - (AND32mi8 addr:$dst, i32immSExt8:$src2)>; - // -disable-16bit support. def : Pat<(truncstorei16 (i16 imm:$src), addr:$dst), (MOV16mi addr:$dst, imm:$src)>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 18f9e52..720b663 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -513,11 +513,10 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { def UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins FR32:$src1, FR32:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, FR32:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR32:$src1, FR32:$src2))]>; def UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs), (ins FR32:$src1, f32mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR32:$src1, (loadf32 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR32:$src1, (loadf32 addr:$src2)))]>; def COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", []>; @@ -546,21 +545,21 @@ let Constraints = "$src1 = $dst" in { let Defs = [EFLAGS] in { def Int_UCOMISSrr: PSI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v4f32 VR128:$src1), VR128:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), + VR128:$src2))]>; def Int_UCOMISSrm: PSI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), "ucomiss\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v4f32 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v4f32 VR128:$src1), + (load addr:$src2)))]>; def Int_COMISSrr: PSI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comiss\t{$src2, $src1|$src1, $src2}", - [(X86comi (v4f32 VR128:$src1), VR128:$src2), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v4f32 VR128:$src1), + VR128:$src2))]>; def Int_COMISSrm: PSI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comiss\t{$src2, $src1|$src1, $src2}", - [(X86comi (v4f32 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v4f32 VR128:$src1), + (load addr:$src2)))]>; } // Defs = [EFLAGS] // Aliases of packed SSE1 instructions for scalar use. These all have names @@ -1298,11 +1297,10 @@ let mayLoad = 1 in let Defs = [EFLAGS] in { def UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins FR64:$src1, FR64:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, FR64:$src2), (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR64:$src1, FR64:$src2))]>; def UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs), (ins FR64:$src1, f64mem:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86cmp FR64:$src1, (loadf64 addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86cmp FR64:$src1, (loadf64 addr:$src2)))]>; } // Defs = [EFLAGS] // Aliases to match intrinsics which expect XMM operand(s). @@ -1324,21 +1322,21 @@ let Constraints = "$src1 = $dst" in { let Defs = [EFLAGS] in { def Int_UCOMISDrr: PDI<0x2E, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v2f64 VR128:$src1), (v2f64 VR128:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), + VR128:$src2))]>; def Int_UCOMISDrm: PDI<0x2E, MRMSrcMem, (outs),(ins VR128:$src1, f128mem:$src2), "ucomisd\t{$src2, $src1|$src1, $src2}", - [(X86ucomi (v2f64 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86ucomi (v2f64 VR128:$src1), + (load addr:$src2)))]>; def Int_COMISDrr: PDI<0x2F, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), "comisd\t{$src2, $src1|$src1, $src2}", - [(X86comi (v2f64 VR128:$src1), (v2f64 VR128:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v2f64 VR128:$src1), + VR128:$src2))]>; def Int_COMISDrm: PDI<0x2F, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), "comisd\t{$src2, $src1|$src1, $src2}", - [(X86comi (v2f64 VR128:$src1), (load addr:$src2)), - (implicit EFLAGS)]>; + [(set EFLAGS, (X86comi (v2f64 VR128:$src1), + (load addr:$src2)))]>; } // Defs = [EFLAGS] // Aliases of packed SSE2 instructions for scalar use. These all have names @@ -3825,54 +3823,65 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), let Constraints = "$src1 = $dst" in { def CRC32m8 : SS42FI<0xF0, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i8mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{b} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_8 GR32:$src1, - (load addr:$src2)))]>, OpSize; + (load addr:$src2)))]>; def CRC32r8 : SS42FI<0xF0, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR8:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{b} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>, - OpSize; + (int_x86_sse42_crc32_8 GR32:$src1, GR8:$src2))]>; def CRC32m16 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i16mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{w} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_16 GR32:$src1, (load addr:$src2)))]>, OpSize; def CRC32r16 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR16:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{w} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_16 GR32:$src1, GR16:$src2))]>, OpSize; def CRC32m32 : SS42FI<0xF1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$src1, i32mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{l} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, (int_x86_sse42_crc32_32 GR32:$src1, - (load addr:$src2)))]>, OpSize; + (load addr:$src2)))]>; def CRC32r32 : SS42FI<0xF1, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{l} \t{$src2, $src1|$src1, $src2}", [(set GR32:$dst, - (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>, - OpSize; - def CRC64m64 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), + (int_x86_sse42_crc32_32 GR32:$src1, GR32:$src2))]>; + def CRC64m8 : SS42FI<0xF0, MRMSrcMem, (outs GR64:$dst), + (ins GR64:$src1, i8mem:$src2), + "crc32{b} \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc64_8 GR64:$src1, + (load addr:$src2)))]>, + REX_W; + def CRC64r8 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, GR8:$src2), + "crc32{b} \t{$src2, $src1|$src1, $src2}", + [(set GR64:$dst, + (int_x86_sse42_crc64_8 GR64:$src1, GR8:$src2))]>, + REX_W; + def CRC64m64 : SS42FI<0xF1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$src1, i64mem:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{q} \t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64 GR64:$src1, + (int_x86_sse42_crc64_64 GR64:$src1, (load addr:$src2)))]>, - OpSize, REX_W; - def CRC64r64 : SS42FI<0xF0, MRMSrcReg, (outs GR64:$dst), + REX_W; + def CRC64r64 : SS42FI<0xF1, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "crc32 \t{$src2, $src1|$src1, $src2}", + "crc32{q} \t{$src2, $src1|$src1, $src2}", [(set GR64:$dst, - (int_x86_sse42_crc32_64 GR64:$src1, GR64:$src2))]>, - OpSize, REX_W; + (int_x86_sse42_crc64_64 GR64:$src1, GR64:$src2))]>, + REX_W; } // String/text processing instructions. diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp index 3f18696..a9681e6 100644 --- a/lib/Target/X86/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/X86MCCodeEmitter.cpp @@ -38,14 +38,15 @@ public: ~X86MCCodeEmitter() {} unsigned getNumFixupKinds() const { - return 3; + return 4; } const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { const static MCFixupKindInfo Infos[] = { - { "reloc_pcrel_4byte", 0, 4 * 8 }, - { "reloc_pcrel_1byte", 0, 1 * 8 }, - { "reloc_riprel_4byte", 0, 4 * 8 } + { "reloc_pcrel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_pcrel_1byte", 0, 1 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_riprel_4byte", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel }, + { "reloc_riprel_4byte_movq_load", 0, 4 * 8, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) @@ -165,7 +166,8 @@ EmitImmediate(const MCOperand &DispOp, unsigned Size, MCFixupKind FixupKind, // If the fixup is pc-relative, we need to bias the value to be relative to // the start of the field, not the end of the field. if (FixupKind == MCFixupKind(X86::reloc_pcrel_4byte) || - FixupKind == MCFixupKind(X86::reloc_riprel_4byte)) + FixupKind == MCFixupKind(X86::reloc_riprel_4byte) || + FixupKind == MCFixupKind(X86::reloc_riprel_4byte_movq_load)) ImmOffset -= 4; if (FixupKind == MCFixupKind(X86::reloc_pcrel_1byte)) ImmOffset -= 1; @@ -197,6 +199,15 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, "Invalid rip-relative address"); EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS); + unsigned FixupKind = X86::reloc_riprel_4byte; + + // movq loads are handled with a special relocation form which allows the + // linker to eliminate some loads for GOT references which end up in the + // same linkage unit. + if (MI.getOpcode() == X86::MOV64rm || + MI.getOpcode() == X86::MOV64rm_TC) + FixupKind = X86::reloc_riprel_4byte_movq_load; + // rip-relative addressing is actually relative to the *next* instruction. // Since an immediate can follow the mod/rm byte for an instruction, this // means that we need to bias the immediate field of the instruction with @@ -204,7 +215,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // expression to emit. int ImmSize = X86II::hasImm(TSFlags) ? X86II::getSizeOfImm(TSFlags) : 0; - EmitImmediate(Disp, 4, MCFixupKind(X86::reloc_riprel_4byte), + EmitImmediate(Disp, 4, MCFixupKind(FixupKind), CurByte, OS, Fixups, -ImmSize); return; } @@ -269,7 +280,10 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op, // Emit the normal disp32 encoding. EmitByte(ModRMByte(2, RegOpcodeField, 4), CurByte, OS); ForceDisp32 = true; - } else if (Disp.getImm() == 0 && BaseReg != X86::EBP) { + } else if (Disp.getImm() == 0 && + // Base reg can't be anything that ends up with '5' as the base + // reg, it is the magic [*] nomenclature that indicates no base. + BaseRegNo != N86::EBP) { // Emit no displacement ModR/M byte EmitByte(ModRMByte(0, RegOpcodeField, 4), CurByte, OS); } else if (isDisp8(Disp.getImm())) { diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index f907614..cd56816 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -366,12 +366,3 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (StackAlignment) stackAlignment = StackAlignment; } - -bool X86Subtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtarget::ANTIDEP_CRITICAL; - CriticalPathRCs.clear(); - return OptLevel >= CodeGenOpt::Aggressive; -} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 50338d3..56220db 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -230,12 +230,6 @@ public: /// indicating the number of scheduling cycles of backscheduling that /// should be attempted. unsigned getSpecialAddressLatency() const; - - /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling - /// at 'More' optimization level. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 56ddaf8..f13e6f3 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -22,7 +22,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 46805d5..2e9a1e5 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -29,7 +29,8 @@ include "XCoreInstrFormats.td" // Call def SDT_XCoreBranchLink : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink, - [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; + [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag, + SDNPVariadic]>; def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone, [SDNPHasChain, SDNPOptInFlag]>; diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 7b1e9c0..d8e97a2 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -622,12 +622,12 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V, return false; // Storing the value. } } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) { - if (CI->getOperand(0) != V) { + if (CI->getCalledValue() != V) { //cerr << "NONTRAPPING USE: " << **UI; return false; // Not calling the ptr } } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) { - if (II->getOperand(0) != V) { + if (II->getCalledValue() != V) { //cerr << "NONTRAPPING USE: " << **UI; return false; // Not calling the ptr } diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index bdb46eb..65f2e15 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -820,7 +820,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) { // We cannot remove an invoke, because it would change the CFG, just // change the callee to a null pointer. - cast<InvokeInst>(OldCall)->setOperand(0, + cast<InvokeInst>(OldCall)->setCalledFunction( Constant::getNullValue(CalleeF->getType())); return 0; } diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index de93e9f..eb04d94 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -307,6 +307,10 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, } } } + + // The insertion point instruction may have been deleted; clear it out + // so that the rewriter doesn't trip over it later. + Rewriter.clearInsertPoint(); } void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) { diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 05027ae..22f3628 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1400,6 +1400,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) { setOnlyReadsMemory(F); setDoesNotThrow(F); setDoesNotCapture(F, 1); + } else if (Name == "strchr" || + Name == "strrchr") { + if (FTy->getNumParams() != 2 || + !FTy->getParamType(0)->isPointerTy() || + !FTy->getParamType(1)->isIntegerTy()) + continue; + setOnlyReadsMemory(F); + setDoesNotThrow(F); } else if (Name == "strcpy" || Name == "stpcpy" || Name == "strcat" || @@ -1428,7 +1436,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) { } else if (Name == "strcmp" || Name == "strspn" || Name == "strncmp" || - Name ==" strcspn" || + Name == "strcspn" || Name == "strcoll" || Name == "strcasecmp" || Name == "strncasecmp") { diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index fd74241..0eb9f02 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -1027,6 +1027,15 @@ static void WriteConstantInt(raw_ostream &Out, const Constant *CV, return; } + if (const ConstantUnion *CU = dyn_cast<ConstantUnion>(CV)) { + Out << "{ "; + TypePrinter.print(CU->getOperand(0)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, CU->getOperand(0), &TypePrinter, Machine); + Out << " }"; + return; + } + if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { const Type *ETy = CP->getType()->getElementType(); assert(CP->getNumOperands() > 0 && diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 721e96a..f141382 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1623,10 +1623,6 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { MDNode *MD = cast<MDNode>(CI.getOperand(1)); Assert1(MD->getNumOperands() == 1, "invalid llvm.dbg.declare intrinsic call 2", &CI); - if (MD->getOperand(0)) - if (Constant *C = dyn_cast<Constant>(MD->getOperand(0))) - Assert1(C && !isa<ConstantPointerNull>(C), - "invalid llvm.dbg.declare intrinsic call 3", &CI); } break; case Intrinsic::memcpy: case Intrinsic::memmove: |