diff options
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/CGBuiltin.cpp | 69 | ||||
-rw-r--r-- | lib/CodeGen/CGDebugInfo.cpp | 41 | ||||
-rw-r--r-- | lib/CodeGen/CGExpr.cpp | 80 | ||||
-rw-r--r-- | lib/CodeGen/CGExprCXX.cpp | 9 | ||||
-rw-r--r-- | lib/CodeGen/CGStmt.cpp | 24 | ||||
-rw-r--r-- | lib/CodeGen/CGValue.h | 2 |
6 files changed, 201 insertions, 24 deletions
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp index 65c782e..59ed313 100644 --- a/lib/CodeGen/CGBuiltin.cpp +++ b/lib/CodeGen/CGBuiltin.cpp @@ -229,6 +229,35 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD, return RValue::get(Result); } + + case Builtin::BI__builtin_conj: + case Builtin::BI__builtin_conjf: + case Builtin::BI__builtin_conjl: { + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + Value *Real = ComplexVal.first; + Value *Imag = ComplexVal.second; + Value *Zero = + Imag->getType()->isFPOrFPVectorTy() + ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType()) + : llvm::Constant::getNullValue(Imag->getType()); + + Imag = Builder.CreateFSub(Zero, Imag, "sub"); + return RValue::getComplex(std::make_pair(Real, Imag)); + } + case Builtin::BI__builtin_creal: + case Builtin::BI__builtin_crealf: + case Builtin::BI__builtin_creall: { + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + return RValue::get(ComplexVal.first); + } + + case Builtin::BI__builtin_cimag: + case Builtin::BI__builtin_cimagf: + case Builtin::BI__builtin_cimagl: { + ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0)); + return RValue::get(ComplexVal.second); + } + case Builtin::BI__builtin_ctzs: case Builtin::BI__builtin_ctz: case Builtin::BI__builtin_ctzl: @@ -1720,8 +1749,29 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops.push_back(GetPointeeAlignmentValue(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty), Ops, "vld1"); - case ARM::BI__builtin_neon_vld1_lane_v: - case ARM::BI__builtin_neon_vld1q_lane_v: { + case ARM::BI__builtin_neon_vld1q_lane_v: + // Handle 64-bit integer elements as a special case. Use shuffles of + // one-element vectors to avoid poor code for i64 in the backend. + if (VTy->getElementType()->isIntegerTy(64)) { + // Extract the other lane. + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + int Lane = cast<ConstantInt>(Ops[2])->getZExtValue(); + Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane)); + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); + // Load the value as a one-element vector. + Ty = llvm::VectorType::get(VTy->getElementType(), 1); + Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Ty); + Value *Ld = Builder.CreateCall2(F, Ops[0], + GetPointeeAlignmentValue(E->getArg(0))); + // Combine them. + SmallVector<Constant*, 2> Indices; + Indices.push_back(ConstantInt::get(Int32Ty, 1-Lane)); + Indices.push_back(ConstantInt::get(Int32Ty, Lane)); + SV = llvm::ConstantVector::get(Indices); + return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane"); + } + // fall through + case ARM::BI__builtin_neon_vld1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ty = llvm::PointerType::getUnqual(VTy->getElementType()); Ops[0] = Builder.CreateBitCast(Ops[0], Ty); @@ -2086,8 +2136,19 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID, Ops.push_back(GetPointeeAlignmentValue(E->getArg(0))); return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, Ty), Ops, ""); - case ARM::BI__builtin_neon_vst1_lane_v: - case ARM::BI__builtin_neon_vst1q_lane_v: { + case ARM::BI__builtin_neon_vst1q_lane_v: + // Handle 64-bit integer elements as a special case. Use a shuffle to get + // a one-element vector and avoid poor code for i64 in the backend. + if (VTy->getElementType()->isIntegerTy(64)) { + Ops[1] = Builder.CreateBitCast(Ops[1], Ty); + Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2])); + Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV); + Ops[2] = GetPointeeAlignmentValue(E->getArg(0)); + return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1, + Ops[1]->getType()), Ops); + } + // fall through + case ARM::BI__builtin_neon_vst1_lane_v: { Ops[1] = Builder.CreateBitCast(Ops[1], Ty); Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]); Ty = llvm::PointerType::getUnqual(Ops[1]->getType()); diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp index 00127ac..fd1c7a3 100644 --- a/lib/CodeGen/CGDebugInfo.cpp +++ b/lib/CodeGen/CGDebugInfo.cpp @@ -94,8 +94,10 @@ llvm::DIDescriptor CGDebugInfo::getContextDescriptor(const Decl *Context) { llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator I = RegionMap.find(Context); - if (I != RegionMap.end()) - return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(&*I->second)); + if (I != RegionMap.end()) { + llvm::Value *V = I->second; + return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(V)); + } // Check namespace. if (const NamespaceDecl *NSDecl = dyn_cast<NamespaceDecl>(Context)) @@ -227,8 +229,8 @@ llvm::DIFile CGDebugInfo::getOrCreateFile(SourceLocation Loc) { if (it != DIFileCache.end()) { // Verify that the information still exists. - if (&*it->second) - return llvm::DIFile(cast<llvm::MDNode>(it->second)); + if (llvm::Value *V = it->second) + return llvm::DIFile(cast<llvm::MDNode>(V)); } llvm::DIFile F = DBuilder.createFile(PLoc.getFilename(), getCurrentDirname()); @@ -525,8 +527,10 @@ llvm::DIDescriptor CGDebugInfo::createContextChain(const Decl *Context) { // See if we already have the parent. llvm::DenseMap<const Decl *, llvm::WeakVH>::iterator I = RegionMap.find(Context); - if (I != RegionMap.end()) - return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(&*I->second)); + if (I != RegionMap.end()) { + llvm::Value *V = I->second; + return llvm::DIDescriptor(dyn_cast_or_null<llvm::MDNode>(V)); + } // Check namespace. if (const NamespaceDecl *NSDecl = dyn_cast<NamespaceDecl>(Context)) @@ -1660,8 +1664,8 @@ llvm::DIType CGDebugInfo::getTypeOrNull(QualType Ty) { TypeCache.find(Ty.getAsOpaquePtr()); if (it != TypeCache.end()) { // Verify that the debug info still exists. - if (&*it->second) - return llvm::DIType(cast<llvm::MDNode>(it->second)); + if (llvm::Value *V = it->second) + return llvm::DIType(cast<llvm::MDNode>(V)); } return llvm::DIType(); @@ -1679,8 +1683,8 @@ llvm::DIType CGDebugInfo::getCompletedTypeOrNull(QualType Ty) { CompletedTypeCache.find(Ty.getAsOpaquePtr()); if (it != CompletedTypeCache.end()) { // Verify that the debug info still exists. - if (&*it->second) - return llvm::DIType(cast<llvm::MDNode>(it->second)); + if (llvm::Value *V = it->second) + return llvm::DIType(cast<llvm::MDNode>(V)); } return llvm::DIType(); @@ -1942,7 +1946,8 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator MI = SPCache.find(FD->getCanonicalDecl()); if (MI != SPCache.end()) { - llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(&*MI->second)); + llvm::Value *V = MI->second; + llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(V)); if (SP.isSubprogram() && !llvm::DISubprogram(SP).isDefinition()) return SP; } @@ -1953,7 +1958,8 @@ llvm::DISubprogram CGDebugInfo::getFunctionDeclaration(const Decl *D) { llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator MI = SPCache.find(NextFD->getCanonicalDecl()); if (MI != SPCache.end()) { - llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(&*MI->second)); + llvm::Value *V = MI->second; + llvm::DISubprogram SP(dyn_cast_or_null<llvm::MDNode>(V)); if (SP.isSubprogram() && !llvm::DISubprogram(SP).isDefinition()) return SP; } @@ -2013,7 +2019,8 @@ void CGDebugInfo::EmitFunctionStart(GlobalDecl GD, QualType FnType, llvm::DenseMap<const FunctionDecl *, llvm::WeakVH>::iterator FI = SPCache.find(FD->getCanonicalDecl()); if (FI != SPCache.end()) { - llvm::DIDescriptor SP(dyn_cast_or_null<llvm::MDNode>(&*FI->second)); + llvm::Value *V = FI->second; + llvm::DIDescriptor SP(dyn_cast_or_null<llvm::MDNode>(V)); if (SP.isSubprogram() && llvm::DISubprogram(SP).isDefinition()) { llvm::MDNode *SPN = SP; LexicalBlockStack.push_back(SPN); @@ -2701,15 +2708,15 @@ void CGDebugInfo::finalize(void) { = ReplaceMap.begin(), VE = ReplaceMap.end(); VI != VE; ++VI) { llvm::DIType Ty, RepTy; // Verify that the debug info still exists. - if (&*VI->second) - Ty = llvm::DIType(cast<llvm::MDNode>(VI->second)); + if (llvm::Value *V = VI->second) + Ty = llvm::DIType(cast<llvm::MDNode>(V)); llvm::DenseMap<void *, llvm::WeakVH>::iterator it = TypeCache.find(VI->first); if (it != TypeCache.end()) { // Verify that the debug info still exists. - if (&*it->second) - RepTy = llvm::DIType(cast<llvm::MDNode>(it->second)); + if (llvm::Value *V = it->second) + RepTy = llvm::DIType(cast<llvm::MDNode>(V)); } if (Ty.Verify() && Ty.isForwardDecl() && RepTy.Verify()) { diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp index ecee7b4..1fe4c18 100644 --- a/lib/CodeGen/CGExpr.cpp +++ b/lib/CodeGen/CGExpr.cpp @@ -938,6 +938,50 @@ llvm::MDNode *CodeGenFunction::getRangeForLoadFromType(QualType Ty) { llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile, unsigned Alignment, QualType Ty, llvm::MDNode *TBAAInfo) { + + // For better performance, handle vector loads differently. + if (Ty->isVectorType()) { + llvm::Value *V; + const llvm::Type *EltTy = + cast<llvm::PointerType>(Addr->getType())->getElementType(); + + const llvm::VectorType *VTy = cast<llvm::VectorType>(EltTy); + + // Handle vectors of size 3, like size 4 for better performance. + if (VTy->getNumElements() == 3) { + + // Bitcast to vec4 type. + llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(), + 4); + llvm::PointerType *ptVec4Ty = + llvm::PointerType::get(vec4Ty, + (cast<llvm::PointerType>( + Addr->getType()))->getAddressSpace()); + llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty, + "castToVec4"); + // Now load value. + llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4"); + + // Shuffle vector to get vec3. + llvm::SmallVector<llvm::Constant*, 3> Mask; + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 0)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 1)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(getLLVMContext()), + 2)); + + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + V = Builder.CreateShuffleVector(LoadVal, + llvm::UndefValue::get(vec4Ty), + MaskV, "extractVec"); + return EmitFromMemory(V, Ty); + } + } + llvm::LoadInst *Load = Builder.CreateLoad(Addr); if (Volatile) Load->setVolatile(true); @@ -984,6 +1028,42 @@ void CodeGenFunction::EmitStoreOfScalar(llvm::Value *Value, llvm::Value *Addr, QualType Ty, llvm::MDNode *TBAAInfo, bool isInit) { + + // Handle vectors differently to get better performance. + if (Ty->isVectorType()) { + llvm::Type *SrcTy = Value->getType(); + llvm::VectorType *VecTy = cast<llvm::VectorType>(SrcTy); + // Handle vec3 special. + if (VecTy->getNumElements() == 3) { + llvm::LLVMContext &VMContext = getLLVMContext(); + + // Our source is a vec3, do a shuffle vector to make it a vec4. + llvm::SmallVector<llvm::Constant*, 4> Mask; + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 0)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 1)); + Mask.push_back(llvm::ConstantInt::get( + llvm::Type::getInt32Ty(VMContext), + 2)); + Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext))); + + llvm::Value *MaskV = llvm::ConstantVector::get(Mask); + Value = Builder.CreateShuffleVector(Value, + llvm::UndefValue::get(VecTy), + MaskV, "extractVec"); + SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4); + } + llvm::PointerType *DstPtr = cast<llvm::PointerType>(Addr->getType()); + if (DstPtr->getElementType() != SrcTy) { + llvm::Type *MemTy = + llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace()); + Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp"); + } + } + Value = EmitToMemory(Value, Ty); llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile); diff --git a/lib/CodeGen/CGExprCXX.cpp b/lib/CodeGen/CGExprCXX.cpp index 7c2c9f1..31ea1b5 100644 --- a/lib/CodeGen/CGExprCXX.cpp +++ b/lib/CodeGen/CGExprCXX.cpp @@ -123,7 +123,14 @@ static bool canDevirtualizeMemberFunctionCalls(ASTContext &Context, return false; } - + + // We can devirtualize calls on an object accessed by a class member access + // expression, since by C++11 [basic.life]p6 we know that it can't refer to + // a derived class object constructed in the same location. + if (const MemberExpr *ME = dyn_cast<MemberExpr>(Base)) + if (const ValueDecl *VD = dyn_cast<ValueDecl>(ME->getMemberDecl())) + return VD->getType()->isRecordType(); + // We can always devirtualize calls on temporary object expressions. if (isa<CXXConstructExpr>(Base)) return true; diff --git a/lib/CodeGen/CGStmt.cpp b/lib/CodeGen/CGStmt.cpp index 467c779..d78908d 100644 --- a/lib/CodeGen/CGStmt.cpp +++ b/lib/CodeGen/CGStmt.cpp @@ -1691,14 +1691,36 @@ void CodeGenFunction::EmitMSAsmStmt(const MSAsmStmt &S) { std::vector<llvm::Value*> Args; std::vector<llvm::Type *> ArgTypes; + std::string Constraints; + + // Clobbers + for (unsigned i = 0, e = S.getNumClobbers(); i != e; ++i) { + StringRef Clobber = S.getClobber(i); + + if (Clobber != "memory" && Clobber != "cc") + Clobber = Target.getNormalizedGCCRegisterName(Clobber); + + if (i != 0) + Constraints += ','; + + Constraints += "~{"; + Constraints += Clobber; + Constraints += '}'; + } + // Add machine specific clobbers std::string MachineClobbers = Target.getClobbers(); + if (!MachineClobbers.empty()) { + if (!Constraints.empty()) + Constraints += ','; + Constraints += MachineClobbers; + } llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, ArgTypes, false); llvm::InlineAsm *IA = - llvm::InlineAsm::get(FTy, *S.getAsmString(), MachineClobbers, true); + llvm::InlineAsm::get(FTy, *S.getAsmString(), Constraints, true); llvm::CallInst *Result = Builder.CreateCall(IA, Args); Result->addAttribute(~0, llvm::Attribute::NoUnwind); Result->addAttribute(~0, llvm::Attribute::IANSDialect); diff --git a/lib/CodeGen/CGValue.h b/lib/CodeGen/CGValue.h index a46f313..c2b8e4d 100644 --- a/lib/CodeGen/CGValue.h +++ b/lib/CodeGen/CGValue.h @@ -128,7 +128,7 @@ class LValue { // The alignment to use when accessing this lvalue. (For vector elements, // this is the alignment of the whole vector.) - unsigned short Alignment; + int64_t Alignment; // objective-c's ivar bool Ivar:1; |