diff options
Diffstat (limited to 'lib')
59 files changed, 2202 insertions, 912 deletions
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index fd97db8..98ab6f4 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -937,6 +937,48 @@ SCEVHandle ScalarEvolution::getSignExtendExpr(const SCEVHandle &Op, return Result; } +/// getAnyExtendExpr - Return a SCEV for the given operand extended with +/// unspecified bits out to the given type. +/// +SCEVHandle ScalarEvolution::getAnyExtendExpr(const SCEVHandle &Op, + const Type *Ty) { + assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && + "This is not an extending conversion!"); + assert(isSCEVable(Ty) && + "This is not a conversion to a SCEVable type!"); + Ty = getEffectiveSCEVType(Ty); + + // Sign-extend negative constants. + if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) + if (SC->getValue()->getValue().isNegative()) + return getSignExtendExpr(Op, Ty); + + // Peel off a truncate cast. + if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { + SCEVHandle NewOp = T->getOperand(); + if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) + return getAnyExtendExpr(NewOp, Ty); + return getTruncateOrNoop(NewOp, Ty); + } + + // Next try a zext cast. If the cast is folded, use it. + SCEVHandle ZExt = getZeroExtendExpr(Op, Ty); + if (!isa<SCEVZeroExtendExpr>(ZExt)) + return ZExt; + + // Next try a sext cast. If the cast is folded, use it. + SCEVHandle SExt = getSignExtendExpr(Op, Ty); + if (!isa<SCEVSignExtendExpr>(SExt)) + return SExt; + + // If the expression is obviously signed, use the sext cast value. + if (isa<SCEVSMaxExpr>(Op)) + return SExt; + + // Absent any other information, use the zext cast value. + return ZExt; +} + /// getAddExpr - Get a canonical add expression, or something simpler if /// possible. SCEVHandle ScalarEvolution::getAddExpr(std::vector<SCEVHandle> &Ops) { @@ -1903,6 +1945,23 @@ ScalarEvolution::getNoopOrSignExtend(const SCEVHandle &V, const Type *Ty) { return getSignExtendExpr(V, Ty); } +/// getNoopOrAnyExtend - Return a SCEV corresponding to a conversion of +/// the input value to the specified type. If the type must be extended, +/// it is extended with unspecified bits. The conversion must not be +/// narrowing. +SCEVHandle +ScalarEvolution::getNoopOrAnyExtend(const SCEVHandle &V, const Type *Ty) { + const Type *SrcTy = V->getType(); + assert((SrcTy->isInteger() || (TD && isa<PointerType>(SrcTy))) && + (Ty->isInteger() || (TD && isa<PointerType>(Ty))) && + "Cannot noop or any extend with non-integer arguments!"); + assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && + "getNoopOrAnyExtend cannot truncate!"); + if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) + return V; // No conversion + return getAnyExtendExpr(V, Ty); +} + /// getTruncateOrNoop - Return a SCEV corresponding to a conversion of the /// input value to the specified type. The conversion must not be widening. SCEVHandle diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index ef77e46..e1f8fa4 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Target/TargetData.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; /// InsertCastOfTo - Insert a cast of V to the specified type, doing what @@ -319,8 +320,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEVHandle *op_begin, if (!AnyNonZeroIndices) { V = InsertNoopCastOfTo(V, Type::Int8Ty->getPointerTo(PTy->getAddressSpace())); - Value *Idx = expand(SE.getAddExpr(Ops)); - Idx = InsertNoopCastOfTo(Idx, Ty); + Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); // Fold a GEP with constant operands. if (Constant *CLHS = dyn_cast<Constant>(V)) @@ -374,8 +374,7 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { // Emit a bunch of add instructions for (int i = S->getNumOperands()-2; i >= 0; --i) { - Value *W = expand(S->getOperand(i)); - W = InsertNoopCastOfTo(W, Ty); + Value *W = expandCodeFor(S->getOperand(i), Ty); V = InsertBinop(Instruction::Add, V, W, InsertPt); } return V; @@ -389,13 +388,11 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { FirstOp = 1; int i = S->getNumOperands()-2; - Value *V = expand(S->getOperand(i+1)); - V = InsertNoopCastOfTo(V, Ty); + Value *V = expandCodeFor(S->getOperand(i+1), Ty); // Emit a bunch of multiply instructions for (; i >= FirstOp; --i) { - Value *W = expand(S->getOperand(i)); - W = InsertNoopCastOfTo(W, Ty); + Value *W = expandCodeFor(S->getOperand(i), Ty); V = InsertBinop(Instruction::Mul, V, W, InsertPt); } @@ -408,8 +405,7 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expand(S->getLHS()); - LHS = InsertNoopCastOfTo(LHS, Ty); + Value *LHS = expandCodeFor(S->getLHS(), Ty); if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { const APInt &RHS = SC->getValue()->getValue(); if (RHS.isPowerOf2()) @@ -418,8 +414,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { InsertPt); } - Value *RHS = expand(S->getRHS()); - RHS = InsertNoopCastOfTo(RHS, Ty); + Value *RHS = expandCodeFor(S->getRHS(), Ty); return InsertBinop(Instruction::UDiv, LHS, RHS, InsertPt); } @@ -448,6 +443,34 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); const Loop *L = S->getLoop(); + // First check for an existing canonical IV in a suitable type. + PHINode *CanonicalIV = 0; + if (PHINode *PN = L->getCanonicalInductionVariable()) + if (SE.isSCEVable(PN->getType()) && + isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) && + SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) + CanonicalIV = PN; + + // Rewrite an AddRec in terms of the canonical induction variable, if + // its type is more narrow. + if (CanonicalIV && + SE.getTypeSizeInBits(CanonicalIV->getType()) > + SE.getTypeSizeInBits(Ty)) { + SCEVHandle Start = SE.getAnyExtendExpr(S->getStart(), + CanonicalIV->getType()); + SCEVHandle Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE), + CanonicalIV->getType()); + Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop())); + BasicBlock::iterator SaveInsertPt = getInsertionPoint(); + BasicBlock::iterator NewInsertPt = + next(BasicBlock::iterator(cast<Instruction>(V))); + while (isa<PHINode>(NewInsertPt)) ++NewInsertPt; + V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, + NewInsertPt); + setInsertionPoint(SaveInsertPt); + return V; + } + // {X,+,F} --> X + {0,+,F} if (!S->getStart()->isZero()) { std::vector<SCEVHandle> NewOps(S->getOperands()); @@ -481,6 +504,14 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // {0,+,1} --> Insert a canonical induction variable into the loop! if (S->isAffine() && S->getOperand(1) == SE.getIntegerSCEV(1, Ty)) { + // If there's a canonical IV, just use it. + if (CanonicalIV) { + assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && + "IVs with types different from the canonical IV should " + "already have been handled!"); + return CanonicalIV; + } + // Create and insert the PHI node for the induction variable in the // specified loop. BasicBlock *Header = L->getHeader(); @@ -508,19 +539,16 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { return PN; } + // {0,+,F} --> {0,+,1} * F // Get the canonical induction variable I for this loop. - Value *I = getOrInsertCanonicalInductionVariable(L, Ty); + Value *I = CanonicalIV ? + CanonicalIV : + getOrInsertCanonicalInductionVariable(L, Ty); // If this is a simple linear addrec, emit it now as a special case. if (S->isAffine()) { // {0,+,F} --> i*F - Value *F = expand(S->getOperand(1)); - F = InsertNoopCastOfTo(F, Ty); - - // IF the step is by one, just return the inserted IV. - if (ConstantInt *CI = dyn_cast<ConstantInt>(F)) - if (CI->getValue() == 1) - return I; - + Value *F = expandCodeFor(S->getOperand(1), Ty); + // If the insert point is directly inside of the loop, emit the multiply at // the insert point. Otherwise, L is a loop that is a parent of the insert // point loop. If we can, move the multiply to the outer most loop that it @@ -555,16 +583,24 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { // into this folder. SCEVHandle IH = SE.getUnknown(I); // Get I as a "symbolic" SCEV. - SCEVHandle V = S->evaluateAtIteration(IH, SE); + // Promote S up to the canonical IV type, if the cast is foldable. + SCEVHandle NewS = S; + SCEVHandle Ext = SE.getNoopOrAnyExtend(S, I->getType()); + if (isa<SCEVAddRecExpr>(Ext)) + NewS = Ext; + + SCEVHandle V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; + // Truncate the result down to the original type, if needed. + SCEVHandle T = SE.getTruncateOrNoop(V, Ty); return expand(V); } Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expand(S->getOperand()); - V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType())); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); Instruction *I = new TruncInst(V, Ty, "tmp.", InsertPt); InsertedValues.insert(I); return I; @@ -572,8 +608,8 @@ Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expand(S->getOperand()); - V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType())); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); Instruction *I = new ZExtInst(V, Ty, "tmp.", InsertPt); InsertedValues.insert(I); return I; @@ -581,8 +617,8 @@ Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *V = expand(S->getOperand()); - V = InsertNoopCastOfTo(V, SE.getEffectiveSCEVType(V->getType())); + Value *V = expandCodeFor(S->getOperand(), + SE.getEffectiveSCEVType(S->getOperand()->getType())); Instruction *I = new SExtInst(V, Ty, "tmp.", InsertPt); InsertedValues.insert(I); return I; @@ -590,11 +626,9 @@ Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expand(S->getOperand(0)); - LHS = InsertNoopCastOfTo(LHS, Ty); + Value *LHS = expandCodeFor(S->getOperand(0), Ty); for (unsigned i = 1; i < S->getNumOperands(); ++i) { - Value *RHS = expand(S->getOperand(i)); - RHS = InsertNoopCastOfTo(RHS, Ty); + Value *RHS = expandCodeFor(S->getOperand(i), Ty); Instruction *ICmp = new ICmpInst(ICmpInst::ICMP_SGT, LHS, RHS, "tmp", InsertPt); InsertedValues.insert(ICmp); @@ -607,11 +641,9 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { const Type *Ty = SE.getEffectiveSCEVType(S->getType()); - Value *LHS = expand(S->getOperand(0)); - LHS = InsertNoopCastOfTo(LHS, Ty); + Value *LHS = expandCodeFor(S->getOperand(0), Ty); for (unsigned i = 1; i < S->getNumOperands(); ++i) { - Value *RHS = expand(S->getOperand(i)); - RHS = InsertNoopCastOfTo(RHS, Ty); + Value *RHS = expandCodeFor(S->getOperand(i), Ty); Instruction *ICmp = new ICmpInst(ICmpInst::ICMP_UGT, LHS, RHS, "tmp", InsertPt); InsertedValues.insert(ICmp); diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index 336a2bd..cebb087 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -167,10 +167,11 @@ Archive::addFileBefore(const sys::Path& filePath, iterator where, mbr->data = 0; mbr->path = filePath; const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); - if (FSInfo) - mbr->info = *FSInfo; - else + if (!FSInfo) { + delete mbr; return true; + } + mbr->info = *FSInfo; unsigned flags = 0; bool hasSlash = filePath.toString().find('/') != std::string::npos; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 9f16728..6dcdded 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1308,16 +1308,6 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit constants. WriteModuleConstants(VE, Stream); - // If we have any aggregate values in the value table, purge them - these can - // only be used to initialize global variables. Doing so makes the value - // namespace smaller for code in functions. - int NumNonAggregates = VE.PurgeAggregateValues(); - if (NumNonAggregates != -1) { - SmallVector<unsigned, 1> Vals; - Vals.push_back(NumNonAggregates); - Stream.EmitRecord(bitc::MODULE_CODE_PURGEVALS, Vals); - } - // Emit function bodies. for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) if (!I->isDeclaration()) diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 8002a36..32b2819 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -277,22 +277,6 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { } -/// PurgeAggregateValues - If there are any aggregate values at the end of the -/// value list, remove them and return the count of the remaining values. If -/// there are none, return -1. -int ValueEnumerator::PurgeAggregateValues() { - // If there are no aggregate values at the end of the list, return -1. - if (Values.empty() || Values.back().first->getType()->isSingleValueType()) - return -1; - - // Otherwise, remove aggregate values... - while (!Values.empty() && !Values.back().first->getType()->isSingleValueType()) - Values.pop_back(); - - // ... and return the new size. - return Values.size(); -} - void ValueEnumerator::incorporateFunction(const Function &F) { NumModuleValues = Values.size(); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index bb0324b..40eeabb 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -99,11 +99,6 @@ public: return Attributes; } - /// PurgeAggregateValues - If there are any aggregate values at the end of the - /// value list, remove them and return the count of the remaining values. If - /// there are none, return -1. - int PurgeAggregateValues(); - /// incorporateFunction/purgeFunction - If you'd like to deal with a function, /// use these two methods to get its data into the ValueEnumerator! /// diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5a66f4b..c773378 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1581,6 +1581,7 @@ void DwarfDebug::EndFunction(MachineFunction *MF) { FunctionDbgScope = NULL; LexicalScopeStack.clear(); AbstractInstanceRootList.clear(); + AbstractInstanceRootMap.clear(); } Lines.clear(); @@ -1669,7 +1670,11 @@ unsigned DwarfDebug::RecordRegionEnd(GlobalVariable *V) { DbgScope *Scope = getOrCreateScope(V); unsigned ID = MMI->NextLabelID(); Scope->setEndLabelID(ID); - if (LexicalScopeStack.size() != 0) + // FIXME : region.end() may not be in the last basic block. + // For now, do not pop last lexical scope because next basic + // block may start new inlined function's body. + unsigned LSSize = LexicalScopeStack.size(); + if (LSSize != 0 && LSSize != 1) LexicalScopeStack.pop_back(); if (TimePassesIsEnabled) diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index ff917a7..5ba8b3c 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_library(LLVMCodeGen IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp + LazyLiveness.cpp LiveInterval.cpp LiveIntervalAnalysis.cpp LiveStackAnalysis.cpp diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h index c22f6ed..796bc2c 100644 --- a/lib/CodeGen/ELF.h +++ b/lib/CodeGen/ELF.h @@ -10,23 +10,24 @@ // This header contains common, non-processor-specific data structures and // constants for the ELF file format. // -// The details of the ELF32 bits in this file are largely based on -// the Tool Interface Standard (TIS) Executable and Linking Format -// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not -// standardized, as far as I can tell. It was largely based on information -// I found in OpenBSD header files. +// The details of the ELF32 bits in this file are largely based on the Tool +// Interface Standard (TIS) Executable and Linking Format (ELF) Specification +// Version 1.2, May 1995. The ELF64 is based on HP/Intel definition of the +// ELF-64 object file format document, Version 1.5 Draft 2 May 27, 1998 // //===----------------------------------------------------------------------===// #ifndef CODEGEN_ELF_H #define CODEGEN_ELF_H +#include "llvm/GlobalVariable.h" +#include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineRelocation.h" #include "llvm/Support/DataTypes.h" #include <cstring> namespace llvm { - class GlobalVariable; + class BinaryObject; // Identification Indexes enum { @@ -47,71 +48,28 @@ namespace llvm { ET_HIPROC = 0xffff // Processor-specific }; - // Object file classes. - enum { - ELFCLASS32 = 1, // 32-bit object file - ELFCLASS64 = 2 // 64-bit object file - }; - - // Object file byte orderings. - enum { - ELFDATA2LSB = 1, // Little-endian object file - ELFDATA2MSB = 2 // Big-endian object file - }; - // Versioning enum { EV_NONE = 0, EV_CURRENT = 1 }; - struct ELFHeader { - // e_machine - This field is the target specific value to emit as the - // e_machine member of the ELF header. - unsigned short e_machine; - - // e_flags - The machine flags for the target. This defaults to zero. - unsigned e_flags; - - // e_size - Holds the ELF header's size in bytes - unsigned e_ehsize; - - // Endianess and ELF Class (64 or 32 bits) - unsigned ByteOrder; - unsigned ElfClass; - - unsigned getByteOrder() const { return ByteOrder; } - unsigned getElfClass() const { return ElfClass; } - unsigned getSize() const { return e_ehsize; } - unsigned getMachine() const { return e_machine; } - unsigned getFlags() const { return e_flags; } - - ELFHeader(unsigned short machine, unsigned flags, - bool is64Bit, bool isLittleEndian) - : e_machine(machine), e_flags(flags) { - ElfClass = is64Bit ? ELFCLASS64 : ELFCLASS32; - ByteOrder = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB; - e_ehsize = is64Bit ? 64 : 52; - } - }; - /// ELFSection - This struct contains information about each section that is /// emitted to the file. This is eventually turned into the section header /// table at the end of the file. - struct ELFSection { - + class ELFSection : public BinaryObject { + public: // ELF specific fields - std::string Name; // Name of the section. - unsigned NameIdx; // Index in .shstrtab of name, once emitted. - unsigned Type; - unsigned Flags; - uint64_t Addr; - unsigned Offset; - unsigned Size; - unsigned Link; - unsigned Info; - unsigned Align; - unsigned EntSize; + unsigned NameIdx; // sh_name - .shstrtab idx of name, once emitted. + unsigned Type; // sh_type - Section contents & semantics + unsigned Flags; // sh_flags - Section flags. + uint64_t Addr; // sh_addr - The mem addr this section is in. + unsigned Offset; // sh_offset - Offset from the file start + unsigned Size; // sh_size - The section size. + unsigned Link; // sh_link - Section header table index link. + unsigned Info; // sh_info - Auxillary information. + unsigned Align; // sh_addralign - Alignment of section. + unsigned EntSize; // sh_entsize - Size of entries in the section e // Section Header Flags enum { @@ -141,8 +99,8 @@ namespace llvm { SHT_REL = 9, // Relocation entries; no explicit addends. SHT_SHLIB = 10, // Reserved. SHT_DYNSYM = 11, // Symbol table. - SHT_LOPROC = 0x70000000, // Lowest processor architecture-specific type. - SHT_HIPROC = 0x7fffffff, // Highest processor architecture-specific type. + SHT_LOPROC = 0x70000000, // Lowest processor arch-specific type. + SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type. SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. SHT_HIUSER = 0xffffffff // Highest type reserved for applications. }; @@ -161,22 +119,9 @@ namespace llvm { /// SectionIdx - The number of the section in the Section Table. unsigned short SectionIdx; - /// SectionData - The actual data for this section which we are building - /// up for emission to the file. - std::vector<unsigned char> SectionData; - - /// Relocations - The relocations that we have encountered so far in this - /// section that we will need to convert to Elf relocation entries when - /// the file is written. - std::vector<MachineRelocation> Relocations; - - /// Section Header Size - static unsigned getSectionHdrSize(bool is64Bit) - { return is64Bit ? 64 : 40; } - - ELFSection(const std::string &name) - : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), - Link(0), Info(0), Align(0), EntSize(0) {} + ELFSection(const std::string &name, bool isLittleEndian, bool is64Bit) + : BinaryObject(name, isLittleEndian, is64Bit), Type(0), Flags(0), Addr(0), + Offset(0), Size(0), Link(0), Info(0), Align(0), EntSize(0) {} }; /// ELFSym - This struct contains information about each symbol that is @@ -207,9 +152,33 @@ namespace llvm { STT_FILE = 4 }; + enum { + STV_DEFAULT = 0, // Visibility is specified by binding type + STV_INTERNAL = 1, // Defined by processor supplements + STV_HIDDEN = 2, // Not visible to other components + STV_PROTECTED = 3 // Visible in other components but not preemptable + }; + ELFSym(const GlobalValue *gv) : GV(gv), NameIdx(0), Value(0), Size(0), Info(0), Other(0), - SectionIdx(ELFSection::SHN_UNDEF) {} + SectionIdx(ELFSection::SHN_UNDEF) { + if (!GV) + return; + + switch (GV->getVisibility()) { + default: + assert(0 && "unknown visibility type"); + case GlobalValue::DefaultVisibility: + Other = STV_DEFAULT; + break; + case GlobalValue::HiddenVisibility: + Other = STV_HIDDEN; + break; + case GlobalValue::ProtectedVisibility: + Other = STV_PROTECTED; + break; + } + } void SetBind(unsigned X) { assert(X == (X & 0xF) && "Bind value out of range!"); diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index c7bd873..ca68396 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -13,9 +13,9 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" +#include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" @@ -28,27 +28,22 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. void ELFCodeEmitter::startFunction(MachineFunction &MF) { - const TargetData *TD = TM.getTargetData(); - const Function *F = MF.getFunction(); - - // Align the output buffer to the appropriate alignment, power of 2. - unsigned FnAlign = F->getAlignment(); - unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); - unsigned Align = std::max(FnAlign, TDAlign); - assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); - // Get the ELF Section that this function belongs in. ES = &EW.getTextSection(); - // FIXME: better memory management, this will be replaced by BinaryObjects - ES->SectionData.reserve(4096); - BufferBegin = &ES->SectionData[0]; - BufferEnd = BufferBegin + ES->SectionData.capacity(); + DOUT << "processing function: " << MF.getFunction()->getName() << "\n"; - // Upgrade the section alignment if required. + // FIXME: better memory management, this will be replaced by BinaryObjects + BinaryData &BD = ES->getData(); + BD.reserve(4096); + BufferBegin = &BD[0]; + BufferEnd = BufferBegin + BD.capacity(); + + // Align the output buffer with function alignment, and + // upgrade the section alignment if required + unsigned Align = + TM.getELFWriterInfo()->getFunctionAlignment(MF.getFunction()); if (ES->Align < Align) ES->Align = Align; - - // Round the size up to the correct alignment for starting the new function. ES->Size = (ES->Size + (Align-1)) & (-Align); // Snaity check on allocated space for text section @@ -107,7 +102,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { FnSym.Value = FnStartPtr-BufferBegin; // Finally, add it to the symtab. - EW.SymbolTable.push_back(FnSym); + EW.SymbolList.push_back(FnSym); // Relocations // ----------- @@ -128,7 +123,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { } else { assert(0 && "Unhandled relocation type"); } - ES->Relocations.push_back(MR); + ES->addRelocation(MR); } Relocations.clear(); diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index 3859ea3..aeccefb 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -26,9 +26,6 @@ // ... // #N. ".shstrtab" entry - String table for the section names. // -// NOTE: This code should eventually be extended to support 64-bit ELF (this -// won't be hard), but we haven't done so yet! -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "elfwriter" @@ -36,18 +33,18 @@ #include "ELFWriter.h" #include "ELFCodeEmitter.h" #include "ELF.h" +#include "llvm/Constants.h" #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/BinaryObject.h" #include "llvm/CodeGen/FileWriters.h" #include "llvm/CodeGen/MachineCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetELFWriterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/Mangler.h" -#include "llvm/Support/OutputBuffer.h" #include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" @@ -70,21 +67,23 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, //===----------------------------------------------------------------------===// ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) - : MachineFunctionPass(&ID), O(o), TM(tm), ElfHdr() { - is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; - isLittleEndian = TM.getTargetData()->isLittleEndian(); + : MachineFunctionPass(&ID), O(o), TM(tm), + is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64), + isLittleEndian(TM.getTargetData()->isLittleEndian()), + ElfHdr(isLittleEndian, is64Bit) { - ElfHdr = new ELFHeader(TM.getELFWriterInfo()->getEMachine(), 0, - is64Bit, isLittleEndian); + TAI = TM.getTargetAsmInfo(); + TEW = TM.getELFWriterInfo(); // Create the machine code emitter object for this target. MCE = new ELFCodeEmitter(*this); + + // Inital number of sections NumSections = 0; } ELFWriter::~ELFWriter() { delete MCE; - delete ElfHdr; } // doInitialization - Emit the file header and all of the global variables for @@ -92,10 +91,6 @@ ELFWriter::~ELFWriter() { bool ELFWriter::doInitialization(Module &M) { Mang = new Mangler(M); - // Local alias to shortenify coming code. - std::vector<unsigned char> &FH = FileHeader; - OutputBuffer FHOut(FH, is64Bit, isLittleEndian); - // ELF Header // ---------- // Fields e_shnum e_shstrndx are only known after all section have @@ -104,54 +99,58 @@ bool ELFWriter::doInitialization(Module &M) { // // Note // ---- - // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing + // emitWord method behaves differently for ELF32 and ELF64, writing // 4 bytes in the former and 8 in the last for *_off and *_addr elf types - FHOut.outbyte(0x7f); // e_ident[EI_MAG0] - FHOut.outbyte('E'); // e_ident[EI_MAG1] - FHOut.outbyte('L'); // e_ident[EI_MAG2] - FHOut.outbyte('F'); // e_ident[EI_MAG3] - - FHOut.outbyte(ElfHdr->getElfClass()); // e_ident[EI_CLASS] - FHOut.outbyte(ElfHdr->getByteOrder()); // e_ident[EI_DATA] - FHOut.outbyte(EV_CURRENT); // e_ident[EI_VERSION] - - FH.resize(16); // e_ident[EI_NIDENT-EI_PAD] - - FHOut.outhalf(ET_REL); // e_type - FHOut.outhalf(ElfHdr->getMachine()); // e_machine = target - FHOut.outword(EV_CURRENT); // e_version - FHOut.outaddr(0); // e_entry = 0, no entry point in .o file - FHOut.outaddr(0); // e_phoff = 0, no program header for .o - ELFHdr_e_shoff_Offset = FH.size(); - FHOut.outaddr(0); // e_shoff = sec hdr table off in bytes - FHOut.outword(ElfHdr->getFlags()); // e_flags = whatever the target wants - FHOut.outhalf(ElfHdr->getSize()); // e_ehsize = ELF header size - FHOut.outhalf(0); // e_phentsize = prog header entry size - FHOut.outhalf(0); // e_phnum = # prog header entries = 0 + ElfHdr.emitByte(0x7f); // e_ident[EI_MAG0] + ElfHdr.emitByte('E'); // e_ident[EI_MAG1] + ElfHdr.emitByte('L'); // e_ident[EI_MAG2] + ElfHdr.emitByte('F'); // e_ident[EI_MAG3] + + ElfHdr.emitByte(TEW->getEIClass()); // e_ident[EI_CLASS] + ElfHdr.emitByte(TEW->getEIData()); // e_ident[EI_DATA] + ElfHdr.emitByte(EV_CURRENT); // e_ident[EI_VERSION] + ElfHdr.emitAlignment(16); // e_ident[EI_NIDENT-EI_PAD] + + ElfHdr.emitWord16(ET_REL); // e_type + ElfHdr.emitWord16(TEW->getEMachine()); // e_machine = target + ElfHdr.emitWord32(EV_CURRENT); // e_version + ElfHdr.emitWord(0); // e_entry, no entry point in .o file + ElfHdr.emitWord(0); // e_phoff, no program header for .o + ELFHdr_e_shoff_Offset = ElfHdr.size(); + ElfHdr.emitWord(0); // e_shoff = sec hdr table off in bytes + ElfHdr.emitWord32(TEW->getEFlags()); // e_flags = whatever the target wants + ElfHdr.emitWord16(TEW->getHdrSize()); // e_ehsize = ELF header size + ElfHdr.emitWord16(0); // e_phentsize = prog header entry size + ElfHdr.emitWord16(0); // e_phnum = # prog header entries = 0 // e_shentsize = Section header entry size - FHOut.outhalf(ELFSection::getSectionHdrSize(is64Bit)); + ElfHdr.emitWord16(TEW->getSHdrSize()); // e_shnum = # of section header ents - ELFHdr_e_shnum_Offset = FH.size(); - FHOut.outhalf(0); + ELFHdr_e_shnum_Offset = ElfHdr.size(); + ElfHdr.emitWord16(0); // Placeholder // e_shstrndx = Section # of '.shstrtab' - ELFHdr_e_shstrndx_Offset = FH.size(); - FHOut.outhalf(0); + ELFHdr_e_shstrndx_Offset = ElfHdr.size(); + ElfHdr.emitWord16(0); // Placeholder // Add the null section, which is required to be first in the file. getSection("", ELFSection::SHT_NULL, 0); - // Start up the symbol table. The first entry in the symtab is the null + // Start up the symbol table. The first entry in the symtab is the null // entry. - SymbolTable.push_back(ELFSym(0)); + SymbolList.push_back(ELFSym(0)); return false; } void ELFWriter::EmitGlobal(GlobalVariable *GV) { + + // XXX: put local symbols *before* global ones! + const Section *S = TAI->SectionForGlobal(GV); + DOUT << "Section " << S->getName() << " for global " << GV->getName() << "\n"; + // If this is an external global, emit it now. TODO: Note that it would be // better to ignore the symbol here and only add it to the symbol table if // referenced. @@ -160,17 +159,17 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) { ExternalSym.SetBind(ELFSym::STB_GLOBAL); ExternalSym.SetType(ELFSym::STT_NOTYPE); ExternalSym.SectionIdx = ELFSection::SHN_UNDEF; - SymbolTable.push_back(ExternalSym); + SymbolList.push_back(ExternalSym); return; } - unsigned Align = TM.getTargetData()->getPreferredAlignment(GV); - unsigned Size = - TM.getTargetData()->getTypeAllocSize(GV->getType()->getElementType()); + const TargetData *TD = TM.getTargetData(); + unsigned Align = TD->getPreferredAlignment(GV); + Constant *CV = GV->getInitializer(); + unsigned Size = TD->getTypeAllocSize(CV->getType()); - // If this global has a zero initializer, it is part of the .bss or common - // section. - if (GV->getInitializer()->isNullValue()) { + // If this global has a zero initializer, go to .bss or common section. + if (CV->isNullValue() || isa<UndefValue>(CV)) { // If this global is part of the common block, add it now. Variables are // part of the common block if they are zero initialized and allowed to be // merged with other symbols. @@ -182,14 +181,14 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) { CommonSym.Size = Size; CommonSym.SetBind(ELFSym::STB_GLOBAL); CommonSym.SetType(ELFSym::STT_OBJECT); - // TODO SOMEDAY: add ELF visibility. CommonSym.SectionIdx = ELFSection::SHN_COMMON; - SymbolTable.push_back(CommonSym); + SymbolList.push_back(CommonSym); + getSection(S->getName(), ELFSection::SHT_NOBITS, + ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC, 1); return; } // Otherwise, this symbol is part of the .bss section. Emit it now. - // Handle alignment. Ensure section is aligned at least as much as required // by this symbol. ELFSection &BSSSection = getBSSSection(); @@ -220,18 +219,128 @@ void ELFWriter::EmitGlobal(GlobalVariable *GV) { // Set the idx of the .bss section BSSSym.SectionIdx = BSSSection.SectionIdx; if (!GV->hasPrivateLinkage()) - SymbolTable.push_back(BSSSym); + SymbolList.push_back(BSSSym); // Reserve space in the .bss section for this symbol. BSSSection.Size += Size; return; } - // FIXME: handle .rodata - //assert(!GV->isConstant() && "unimp"); + /// Emit the Global symbol to the right ELF section + ELFSym GblSym(GV); + GblSym.Size = Size; + GblSym.SetType(ELFSym::STT_OBJECT); + GblSym.SetBind(ELFSym::STB_GLOBAL); + unsigned Flags = S->getFlags(); + unsigned SectType = ELFSection::SHT_PROGBITS; + unsigned SHdrFlags = ELFSection::SHF_ALLOC; + + if (Flags & SectionFlags::Code) + SHdrFlags |= ELFSection::SHF_EXECINSTR; + if (Flags & SectionFlags::Writeable) + SHdrFlags |= ELFSection::SHF_WRITE; + if (Flags & SectionFlags::Mergeable) + SHdrFlags |= ELFSection::SHF_MERGE; + if (Flags & SectionFlags::TLS) + SHdrFlags |= ELFSection::SHF_TLS; + if (Flags & SectionFlags::Strings) + SHdrFlags |= ELFSection::SHF_STRINGS; + + // Remove tab from section name prefix + std::string SectionName(S->getName()); + size_t Pos = SectionName.find("\t"); + if (Pos != std::string::npos) + SectionName.erase(Pos, 1); + + // The section alignment should be bound to the element with + // the largest alignment + ELFSection &ElfS = getSection(SectionName, SectType, SHdrFlags); + GblSym.SectionIdx = ElfS.SectionIdx; + if (Align > ElfS.Align) + ElfS.Align = Align; + + // S.Value should contain the symbol index inside the section, + // and all symbols should start on their required alignment boundary + GblSym.Value = (ElfS.size() + (Align-1)) & (-Align); + ElfS.emitAlignment(Align); + + // Emit the constant symbol to its section + EmitGlobalConstant(CV, ElfS); + SymbolList.push_back(GblSym); +} - // FIXME: handle .data - //assert(0 && "unimp"); +void ELFWriter::EmitGlobalConstantStruct(const ConstantStruct *CVS, + ELFSection &GblS) { + + // Print the fields in successive locations. Pad to align if needed! + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CVS->getType()); + const StructLayout *cvsLayout = TD->getStructLayout(CVS->getType()); + uint64_t sizeSoFar = 0; + for (unsigned i = 0, e = CVS->getNumOperands(); i != e; ++i) { + const Constant* field = CVS->getOperand(i); + + // Check if padding is needed and insert one or more 0s. + uint64_t fieldSize = TD->getTypeAllocSize(field->getType()); + uint64_t padSize = ((i == e-1 ? Size : cvsLayout->getElementOffset(i+1)) + - cvsLayout->getElementOffset(i)) - fieldSize; + sizeSoFar += fieldSize + padSize; + + // Now print the actual field value. + EmitGlobalConstant(field, GblS); + + // Insert padding - this may include padding to increase the size of the + // current field up to the ABI size (if the struct is not packed) as well + // as padding to ensure that the next field starts at the right offset. + for (unsigned p=0; p < padSize; p++) + GblS.emitByte(0); + } + assert(sizeSoFar == cvsLayout->getSizeInBytes() && + "Layout of constant struct may be incorrect!"); +} + +void ELFWriter::EmitGlobalConstant(const Constant *CV, ELFSection &GblS) { + const TargetData *TD = TM.getTargetData(); + unsigned Size = TD->getTypeAllocSize(CV->getType()); + + if (const ConstantArray *CVA = dyn_cast<ConstantArray>(CV)) { + if (CVA->isString()) { + std::string GblStr = CVA->getAsString(); + GblS.emitString(GblStr); + } else { // Not a string. Print the values in successive locations + for (unsigned i = 0, e = CVA->getNumOperands(); i != e; ++i) + EmitGlobalConstant(CVA->getOperand(i), GblS); + } + return; + } else if (const ConstantStruct *CVS = dyn_cast<ConstantStruct>(CV)) { + EmitGlobalConstantStruct(CVS, GblS); + return; + } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) { + uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (CFP->getType() == Type::DoubleTy) + GblS.emitWord64(Val); + else if (CFP->getType() == Type::FloatTy) + GblS.emitWord32(Val); + else if (CFP->getType() == Type::X86_FP80Ty) { + assert(0 && "X86_FP80Ty global emission not implemented"); + } else if (CFP->getType() == Type::PPC_FP128Ty) + assert(0 && "PPC_FP128Ty global emission not implemented"); + return; + } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) { + if (Size == 4) + GblS.emitWord32(CI->getZExtValue()); + else if (Size == 8) + GblS.emitWord64(CI->getZExtValue()); + else + assert(0 && "LargeInt global emission not implemented"); + return; + } else if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) { + const VectorType *PTy = CP->getType(); + for (unsigned I = 0, E = PTy->getNumElements(); I < E; ++I) + EmitGlobalConstant(CP->getOperand(I), GblS); + return; + } + assert(0 && "unknown global constant"); } @@ -243,22 +352,41 @@ bool ELFWriter::runOnMachineFunction(MachineFunction &MF) { /// doFinalization - Now that the module has been completely processed, emit /// the ELF file to 'O'. bool ELFWriter::doFinalization(Module &M) { - // Okay, the ELF header and .text sections have been completed, build the - // .data, .bss, and "common" sections next. + /// FIXME: This should be removed when moving to ObjectCodeEmiter. Since the + /// current ELFCodeEmiter uses CurrBuff, ... it doesn't update S.Data + /// vector size for .text sections, so this is a quick dirty fix + ELFSection &TS = getTextSection(); + if (TS.Size) { + BinaryData &BD = TS.getData(); + for (unsigned e=0; e<TS.Size; ++e) + BD.push_back(BD[e]); + } + + // Emit .data section placeholder + getDataSection(); + + // Emit .bss section placeholder + getBSSSection(); + + // Build and emit data, bss and "common" sections. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) EmitGlobal(I); + // Emit non-executable stack note + if (TAI->getNonexecutableStackDirective()) + getNonExecStackSection(); + // Emit the symbol table now, if non-empty. EmitSymbolTable(); // Emit the relocation sections. EmitRelocations(); - // Emit the string table for the sections in the ELF file. + // Emit the sections string table. EmitSectionTableStringTable(); - // Emit the sections to the .o file, and emit the section table for the file. + // Dump the sections and section table to the .o file. OutputSectionsAndSectionTable(); // We are done with the abstract symbols. @@ -274,78 +402,97 @@ bool ELFWriter::doFinalization(Module &M) { void ELFWriter::EmitRelocations() { } +/// EmitSymbol - Write symbol 'Sym' to the symbol table 'SymbolTable' +void ELFWriter::EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym) { + if (is64Bit) { + SymbolTable.emitWord32(Sym.NameIdx); + SymbolTable.emitByte(Sym.Info); + SymbolTable.emitByte(Sym.Other); + SymbolTable.emitWord16(Sym.SectionIdx); + SymbolTable.emitWord64(Sym.Value); + SymbolTable.emitWord64(Sym.Size); + } else { + SymbolTable.emitWord32(Sym.NameIdx); + SymbolTable.emitWord32(Sym.Value); + SymbolTable.emitWord32(Sym.Size); + SymbolTable.emitByte(Sym.Info); + SymbolTable.emitByte(Sym.Other); + SymbolTable.emitWord16(Sym.SectionIdx); + } +} + +/// EmitSectionHeader - Write section 'Section' header in 'SHdrTab' +/// Section Header Table +void ELFWriter::EmitSectionHeader(BinaryObject &SHdrTab, + const ELFSection &SHdr) { + SHdrTab.emitWord32(SHdr.NameIdx); + SHdrTab.emitWord32(SHdr.Type); + if (is64Bit) { + SHdrTab.emitWord64(SHdr.Flags); + SHdrTab.emitWord(SHdr.Addr); + SHdrTab.emitWord(SHdr.Offset); + SHdrTab.emitWord64(SHdr.Size); + SHdrTab.emitWord32(SHdr.Link); + SHdrTab.emitWord32(SHdr.Info); + SHdrTab.emitWord64(SHdr.Align); + SHdrTab.emitWord64(SHdr.EntSize); + } else { + SHdrTab.emitWord32(SHdr.Flags); + SHdrTab.emitWord(SHdr.Addr); + SHdrTab.emitWord(SHdr.Offset); + SHdrTab.emitWord32(SHdr.Size); + SHdrTab.emitWord32(SHdr.Link); + SHdrTab.emitWord32(SHdr.Info); + SHdrTab.emitWord32(SHdr.Align); + SHdrTab.emitWord32(SHdr.EntSize); + } +} + /// EmitSymbolTable - If the current symbol table is non-empty, emit the string /// table for it and then the symbol table itself. void ELFWriter::EmitSymbolTable() { - if (SymbolTable.size() == 1) return; // Only the null entry. + if (SymbolList.size() == 1) return; // Only the null entry. // FIXME: compact all local symbols to the start of the symtab. unsigned FirstNonLocalSymbol = 1; - ELFSection &StrTab = getSection(".strtab", ELFSection::SHT_STRTAB, 0); - StrTab.Align = 1; - - DataBuffer &StrTabBuf = StrTab.SectionData; - OutputBuffer StrTabOut(StrTabBuf, is64Bit, isLittleEndian); + ELFSection &StrTab = getStringTableSection(); // Set the zero'th symbol to a null byte, as required. - StrTabOut.outbyte(0); + StrTab.emitByte(0); + unsigned Index = 1; - for (unsigned i = 1, e = SymbolTable.size(); i != e; ++i) { + for (unsigned i = 1, e = SymbolList.size(); i != e; ++i) { // Use the name mangler to uniquify the LLVM symbol. - std::string Name = Mang->getValueName(SymbolTable[i].GV); + std::string Name = Mang->getValueName(SymbolList[i].GV); if (Name.empty()) { - SymbolTable[i].NameIdx = 0; + SymbolList[i].NameIdx = 0; } else { - SymbolTable[i].NameIdx = Index; - - // Add the name to the output buffer, including the null terminator. - StrTabBuf.insert(StrTabBuf.end(), Name.begin(), Name.end()); - - // Add a null terminator. - StrTabBuf.push_back(0); + SymbolList[i].NameIdx = Index; + StrTab.emitString(Name); // Keep track of the number of bytes emitted to this section. Index += Name.size()+1; } } - assert(Index == StrTabBuf.size()); + assert(Index == StrTab.size()); StrTab.Size = Index; // Now that we have emitted the string table and know the offset into the // string table of each symbol, emit the symbol table itself. - ELFSection &SymTab = getSection(".symtab", ELFSection::SHT_SYMTAB, 0); - SymTab.Align = is64Bit ? 8 : 4; - SymTab.Link = StrTab.SectionIdx; // Section Index of .strtab. - SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol. - SymTab.EntSize = is64Bit ? 24 : 16; // Size of each symtab entry. - DataBuffer &SymTabBuf = SymTab.SectionData; - OutputBuffer SymTabOut(SymTabBuf, is64Bit, isLittleEndian); - - if (!is64Bit) { // 32-bit and 64-bit formats are shuffled a bit. - for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { - ELFSym &Sym = SymbolTable[i]; - SymTabOut.outword(Sym.NameIdx); - SymTabOut.outaddr32(Sym.Value); - SymTabOut.outword(Sym.Size); - SymTabOut.outbyte(Sym.Info); - SymTabOut.outbyte(Sym.Other); - SymTabOut.outhalf(Sym.SectionIdx); - } - } else { - for (unsigned i = 0, e = SymbolTable.size(); i != e; ++i) { - ELFSym &Sym = SymbolTable[i]; - SymTabOut.outword(Sym.NameIdx); - SymTabOut.outbyte(Sym.Info); - SymTabOut.outbyte(Sym.Other); - SymTabOut.outhalf(Sym.SectionIdx); - SymTabOut.outaddr64(Sym.Value); - SymTabOut.outxword(Sym.Size); - } - } + ELFSection &SymTab = getSymbolTableSection(); + SymTab.Align = TEW->getSymTabAlignment(); + SymTab.Link = StrTab.SectionIdx; // Section Index of .strtab. + SymTab.Info = FirstNonLocalSymbol; // First non-STB_LOCAL symbol. + + // Size of each symtab entry. + SymTab.EntSize = TEW->getSymTabEntrySize(); + + for (unsigned i = 0, e = SymbolList.size(); i != e; ++i) + EmitSymbol(SymTab, SymbolList[i]); - SymTab.Size = SymTabBuf.size(); + SymTab.Size = SymTab.size(); } /// EmitSectionTableStringTable - This method adds and emits a section for the @@ -357,32 +504,25 @@ void ELFWriter::EmitSectionTableStringTable() { // Now that we know which section number is the .shstrtab section, update the // e_shstrndx entry in the ELF header. - OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); - FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); + ElfHdr.fixWord16(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); // Set the NameIdx of each section in the string table and emit the bytes for // the string table. unsigned Index = 0; - DataBuffer &Buf = SHStrTab.SectionData; for (std::list<ELFSection>::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { // Set the index into the table. Note if we have lots of entries with // common suffixes, we could memoize them here if we cared. I->NameIdx = Index; - - // Add the name to the output buffer, including the null terminator. - Buf.insert(Buf.end(), I->Name.begin(), I->Name.end()); - - // Add a null terminator. - Buf.push_back(0); + SHStrTab.emitString(I->getName()); // Keep track of the number of bytes emitted to this section. - Index += I->Name.size()+1; + Index += I->getName().size()+1; } // Set the size of .shstrtab now that we know what it is. - assert(Index == Buf.size()); + assert(Index == SHStrTab.size()); SHStrTab.Size = Index; } @@ -391,9 +531,9 @@ void ELFWriter::EmitSectionTableStringTable() { /// SectionTable. void ELFWriter::OutputSectionsAndSectionTable() { // Pass #1: Compute the file offset for each section. - size_t FileOff = FileHeader.size(); // File header first. + size_t FileOff = ElfHdr.size(); // File header first. - // Emit all of the section data in order. + // Adjust alignment of all section if needed. for (std::list<ELFSection>::iterator I = SectionList.begin(), E = SectionList.end(); I != E; ++I) { @@ -401,9 +541,14 @@ void ELFWriter::OutputSectionsAndSectionTable() { if (!I->SectionIdx) continue; + if (!I->size()) { + I->Offset = FileOff; + continue; + } + // Update Section size if (!I->Size) - I->Size = I->SectionData.size(); + I->Size = I->size(); // Align FileOff to whatever the alignment restrictions of the section are. if (I->Align) @@ -419,49 +564,40 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Now that we know where all of the sections will be emitted, set the e_shnum // entry in the ELF header. - OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); - FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset); + ElfHdr.fixWord16(NumSections, ELFHdr_e_shnum_Offset); // Now that we know the offset in the file of the section table, update the // e_shoff address in the ELF header. - FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset); + ElfHdr.fixWord(FileOff, ELFHdr_e_shoff_Offset); // Now that we know all of the data in the file header, emit it and all of the // sections! - O.write((char*)&FileHeader[0], FileHeader.size()); - FileOff = FileHeader.size(); - DataBuffer().swap(FileHeader); + O.write((char *)&ElfHdr.getData()[0], ElfHdr.size()); + FileOff = ElfHdr.size(); - DataBuffer Table; - OutputBuffer TableOut(Table, is64Bit, isLittleEndian); + // Section Header Table blob + BinaryObject SHdrTable(isLittleEndian, is64Bit); - // Emit all of the section data and build the section table itself. + // Emit all of sections to the file and build the section header table. while (!SectionList.empty()) { - const ELFSection &S = *SectionList.begin(); + ELFSection &S = *SectionList.begin(); + DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.getName() + << ", Size: " << S.Size << ", Offset: " << S.Offset + << ", SectionData Size: " << S.size() << "\n"; // Align FileOff to whatever the alignment restrictions of the section are. - if (S.Align) + if (S.Align) { for (size_t NewFileOff = (FileOff+S.Align-1) & ~(S.Align-1); - FileOff != NewFileOff; ++FileOff) + FileOff != NewFileOff; ++FileOff) O << (char)0xAB; - O.write((char*)&S.SectionData[0], S.Size); - - DOUT << "SectionIdx: " << S.SectionIdx << ", Name: " << S.Name - << ", Size: " << S.Size << ", Offset: " << S.Offset << "\n"; - - FileOff += S.Size; + } - TableOut.outword(S.NameIdx); // sh_name - Symbol table name idx - TableOut.outword(S.Type); // sh_type - Section contents & semantics - TableOut.outaddr(S.Flags); // sh_flags - Section flags. - TableOut.outaddr(S.Addr); // sh_addr - The mem addr this section is in. - TableOut.outaddr(S.Offset); // sh_offset - Offset from the file start. - TableOut.outaddr(S.Size); // sh_size - The section size. - TableOut.outword(S.Link); // sh_link - Section header table index link. - TableOut.outword(S.Info); // sh_info - Auxillary information. - TableOut.outaddr(S.Align); // sh_addralign - Alignment of section. - TableOut.outaddr(S.EntSize); // sh_entsize - Size of entries in the section + if (S.size()) { + O.write((char *)&S.getData()[0], S.Size); + FileOff += S.Size; + } + EmitSectionHeader(SHdrTable, S); SectionList.pop_front(); } @@ -471,5 +607,5 @@ void ELFWriter::OutputSectionsAndSectionTable() { O << (char)0xAB; // Emit the section table itself. - O.write((char*)&Table[0], Table.size()); + O.write((char *)&SHdrTable.getData()[0], SHdrTable.size()); } diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index 14a44f0..8a380f0 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -16,15 +16,20 @@ #include "llvm/ADT/SetVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/OutputBuffer.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetELFWriterInfo.h" #include "ELF.h" #include <list> #include <map> namespace llvm { + class BinaryObject; + class ConstantStruct; + class ELFCodeEmitter; class GlobalVariable; class Mangler; class MachineCodeEmitter; - class ELFCodeEmitter; class raw_ostream; /// ELFWriter - This class implements the common target-independent code for @@ -52,6 +57,9 @@ namespace llvm { /// Target machine description. TargetMachine &TM; + /// Target Elf Writer description. + const TargetELFWriterInfo *TEW; + /// Mang - The object used to perform name mangling for this module. Mangler *Mang; @@ -59,6 +67,10 @@ namespace llvm { /// code for functions to the .o file. ELFCodeEmitter *MCE; + /// TAI - Target Asm Info, provide information about section names for + /// globals and other target specific stuff. + const TargetAsmInfo *TAI; + //===------------------------------------------------------------------===// // Properties inferred automatically from the target machine. //===------------------------------------------------------------------===// @@ -77,13 +89,8 @@ namespace llvm { bool doFinalization(Module &M); private: - // The buffer we accumulate the file header into. Note that this should be - // changed into something much more efficient later (and the bitcode writer - // as well!). - DataBuffer FileHeader; - - /// ElfHdr - Hold information about the ELF Header - ELFHeader *ElfHdr; + // Blob containing the Elf header + BinaryObject ElfHdr; /// SectionList - This is the list of sections that we have emitted to the /// file. Once the file has been completely built, the section header table @@ -97,17 +104,18 @@ namespace llvm { /// getSection - Return the section with the specified name, creating a new /// section if one does not already exist. - ELFSection &getSection(const std::string &Name, - unsigned Type, unsigned Flags = 0) { + ELFSection &getSection(const std::string &Name, unsigned Type, + unsigned Flags = 0, unsigned Align = 0) { ELFSection *&SN = SectionLookup[Name]; if (SN) return *SN; - SectionList.push_back(Name); + SectionList.push_back(ELFSection(Name, isLittleEndian, is64Bit)); SN = &SectionList.back(); SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; SN->Link = ELFSection::SHN_UNDEF; + SN->Align = Align; return *SN; } @@ -116,23 +124,36 @@ namespace llvm { ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC); } + ELFSection &getNonExecStackSection() { + return getSection(".note.GNU-stack", ELFSection::SHT_PROGBITS, 0, 1); + } + + ELFSection &getSymbolTableSection() { + return getSection(".symtab", ELFSection::SHT_SYMTAB, 0); + } + + ELFSection &getStringTableSection() { + return getSection(".strtab", ELFSection::SHT_STRTAB, 0, 1); + } + ELFSection &getDataSection() { return getSection(".data", ELFSection::SHT_PROGBITS, ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); } + ELFSection &getBSSSection() { return getSection(".bss", ELFSection::SHT_NOBITS, ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); } - /// SymbolTable - This is the list of symbols we have emitted to the file. + /// SymbolList - This is the list of symbols we have emitted to the file. /// This actually gets rearranged before emission to the file (to put the /// local symbols first in the list). - std::vector<ELFSym> SymbolTable; + std::vector<ELFSym> SymbolList; - /// PendingSyms - This is a list of externally defined symbols that we have - /// been asked to emit, but have not seen a reference to. When a reference - /// is seen, the symbol will move from this list to the SymbolTable. + /// PendingGlobals - List of externally defined symbols that we have been + /// asked to emit, but have not seen a reference to. When a reference + /// is seen, the symbol will move from this list to the SymbolList. SetVector<GlobalValue*> PendingGlobals; // As we complete the ELF file, we need to update fields in the ELF header @@ -142,11 +163,17 @@ namespace llvm { unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header. unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header. unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. + private: void EmitGlobal(GlobalVariable *GV); - void EmitSymbolTable(); + void EmitGlobalConstant(const Constant *C, ELFSection &GblS); + void EmitGlobalConstantStruct(const ConstantStruct *CVS, + ELFSection &GblS); void EmitRelocations(); + void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr); void EmitSectionTableStringTable(); + void EmitSymbol(BinaryObject &SymbolTable, ELFSym &Sym); + void EmitSymbolTable(); void OutputSectionsAndSectionTable(); }; } diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index b3c60e6..a163cac 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -240,7 +240,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) { PM.add(createMachineLICMPass()); PM.add(createMachineSinkingPass()); - printAndVerify(PM, /* allowDoubleDefs= */ true); + printAndVerify(PM, /* allowDoubleDefs= */ false); } // Run pre-ra passes. diff --git a/lib/CodeGen/LazyLiveness.cpp b/lib/CodeGen/LazyLiveness.cpp new file mode 100644 index 0000000..6fb35d2 --- /dev/null +++ b/lib/CodeGen/LazyLiveness.cpp @@ -0,0 +1,158 @@ +//===- LazyLiveness.cpp - Lazy, CFG-invariant liveness information --------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a lazy liveness analysis as per "Fast Liveness Checking +// for SSA-form Programs," by Boissinot, et al. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "lazyliveness" +#include "llvm/CodeGen/LazyLiveness.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +using namespace llvm; + +char LazyLiveness::ID = 0; +static RegisterPass<LazyLiveness> X("lazy-liveness", "Lazy Liveness Analysis"); + +void LazyLiveness::computeBackedgeChain(MachineFunction& mf, + MachineBasicBlock* MBB) { + SparseBitVector<128> tmp = rv[MBB]; + tmp.set(preorder[MBB]); + tmp &= backedge_source; + calculated.set(preorder[MBB]); + + for (SparseBitVector<128>::iterator I = tmp.begin(); I != tmp.end(); ++I) { + MachineBasicBlock* SrcMBB = rev_preorder[*I]; + + for (MachineBasicBlock::succ_iterator SI = SrcMBB->succ_begin(); + SI != SrcMBB->succ_end(); ++SI) { + MachineBasicBlock* TgtMBB = *SI; + + if (backedges.count(std::make_pair(SrcMBB, TgtMBB)) && + !rv[MBB].test(preorder[TgtMBB])) { + if (!calculated.test(preorder[TgtMBB])) + computeBackedgeChain(mf, TgtMBB); + + tv[MBB].set(preorder[TgtMBB]); + tv[MBB] |= tv[TgtMBB]; + } + } + + tv[MBB].reset(preorder[MBB]); + } +} + +bool LazyLiveness::runOnMachineFunction(MachineFunction &mf) { + rv.clear(); + tv.clear(); + backedges.clear(); + backedge_source.clear(); + backedge_target.clear(); + calculated.clear(); + preorder.clear(); + + MRI = &mf.getRegInfo(); + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + + // Step 0: Compute preorder numbering for all MBBs. + unsigned num = 0; + for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT.getRootNode()), + DE = df_end(MDT.getRootNode()); DI != DE; ++DI) { + preorder[(*DI)->getBlock()] = num++; + rev_preorder.push_back((*DI)->getBlock()); + } + + // Step 1: Compute the transitive closure of the CFG, ignoring backedges. + for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()), + POE = po_end(&*mf.begin()); POI != POE; ++POI) { + MachineBasicBlock* MBB = *POI; + SparseBitVector<128>& entry = rv[MBB]; + entry.set(preorder[MBB]); + + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + DenseMap<MachineBasicBlock*, SparseBitVector<128> >::iterator SII = + rv.find(*SI); + + // Because we're iterating in postorder, any successor that does not yet + // have an rv entry must be on a backedge. + if (SII != rv.end()) { + entry |= SII->second; + } else { + backedges.insert(std::make_pair(MBB, *SI)); + backedge_source.set(preorder[MBB]); + backedge_target.set(preorder[*SI]); + } + } + } + + for (SparseBitVector<128>::iterator I = backedge_source.begin(); + I != backedge_source.end(); ++I) + computeBackedgeChain(mf, rev_preorder[*I]); + + for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()), + POE = po_end(&*mf.begin()); POI != POE; ++POI) + if (!backedge_target.test(preorder[*POI])) + for (MachineBasicBlock::succ_iterator SI = (*POI)->succ_begin(), + SE = (*POI)->succ_end(); SI != SE; ++SI) + if (!backedges.count(std::make_pair(*POI, *SI)) && tv.count(*SI)) { + SparseBitVector<128>& PBV = tv[*POI]; + PBV = tv[*SI]; + } + + for (po_iterator<MachineBasicBlock*> POI = po_begin(&*mf.begin()), + POE = po_end(&*mf.begin()); POI != POE; ++POI) + tv[*POI].set(preorder[*POI]); + + return false; +} + +bool LazyLiveness::vregLiveIntoMBB(unsigned vreg, MachineBasicBlock* MBB) { + MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>(); + + MachineBasicBlock* DefMBB = MRI->def_begin(vreg)->getParent(); + unsigned def = preorder[DefMBB]; + unsigned max_dom = 0; + for (df_iterator<MachineDomTreeNode*> DI = df_begin(MDT[DefMBB]), + DE = df_end(MDT[DefMBB]); DI != DE; ++DI) { + if (preorder[DI->getBlock()] > max_dom) { + max_dom = preorder[(*DI)->getBlock()]; + } + } + + if (preorder[MBB] <= def || max_dom < preorder[MBB]) + return false; + + SparseBitVector<128>::iterator I = tv[MBB].begin(); + while (I != tv[MBB].end() && *I <= def) ++I; + while (I != tv[MBB].end() && *I < max_dom) { + for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(vreg), + UE = MachineRegisterInfo::use_end(); UI != UE; ++UI) { + MachineBasicBlock* UseMBB = UI->getParent(); + if (rv[rev_preorder[*I]].test(preorder[UseMBB])) + return true; + + unsigned t_dom = 0; + for (df_iterator<MachineDomTreeNode*> DI = + df_begin(MDT[rev_preorder[*I]]), DE = df_end(MDT[rev_preorder[*I]]); + DI != DE; ++DI) + if (preorder[DI->getBlock()] > t_dom) { + max_dom = preorder[(*DI)->getBlock()]; + } + I = tv[MBB].begin(); + while (I != tv[MBB].end() && *I < t_dom) ++I; + } + } + + return false; +} diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 944468e..3feb92f 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -214,26 +214,33 @@ void RegScavenger::forward() { } // Process uses first. - BitVector UseRegs(NumPhysRegs); + BitVector KillRegs(NumPhysRegs); for (unsigned i = 0, e = UseMOs.size(); i != e; ++i) { const MachineOperand MO = *UseMOs[i].first; unsigned Reg = MO.getReg(); assert(isUsed(Reg) && "Using an undefined register!"); - if (MO.isKill() && !isReserved(Reg)) { - UseRegs.set(Reg); + // Kill of implicit_def defined registers are ignored. e.g. + // entry: 0x2029ab8, LLVM BB @0x1b06080, ID#0: + // Live Ins: %R0 + // %R0<def> = IMPLICIT_DEF + // %R0<def> = IMPLICIT_DEF + // STR %R0<kill>, %R0, %reg0, 0, 14, %reg0, Mem:ST(4,4) [0x1b06510 + 0] + // %R1<def> = LDR %R0, %reg0, 24, 14, %reg0, Mem:LD(4,4) [0x1b065bc + 0] + if (MO.isKill() && !isReserved(Reg) && !isImplicitlyDefined(Reg)) { + KillRegs.set(Reg); // Mark sub-registers as used. for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); unsigned SubReg = *SubRegs; ++SubRegs) - UseRegs.set(SubReg); + KillRegs.set(SubReg); } } // Change states of all registers after all the uses are processed to guard // against multiple uses. - setUnused(UseRegs); + setUnused(KillRegs); // Process early clobber defs then process defs. We can have a early clobber // that is dead, it should not conflict with a def that happens one "slot" diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index f8ae884..43995cb 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -563,6 +563,11 @@ void *JIT::getPointerToFunction(Function *F) { return Addr; // Check if function already code gen'd MutexGuard locked(lock); + + // Now that this thread owns the lock, check if another thread has already + // code gen'd the function. + if (void *Addr = getPointerToGlobalIfAvailable(F)) + return Addr; // Make sure we read in the function if it exists in this Module. if (F->hasNotBeenReadFromBitcode()) { diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index ac7de91..7edd118 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -98,12 +98,12 @@ FunctionPass *createARMCodePrinterPass(raw_ostream &O, FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, MachineCodeEmitter &MCE); -FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM, - MachineCodeEmitter &MCE); -FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM, - JITCodeEmitter &JCE); +FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM, + MachineCodeEmitter &MCE); +FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, + JITCodeEmitter &JCE); -FunctionPass *createARMLoadStoreOptimizationPass(); +FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); FunctionPass *createARMConstantIslandPass(); } // end namespace llvm; diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index 4ac6857..594811d 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -28,6 +28,8 @@ def ArchV5TE : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE", "ARM v5TE, v5TEj, v5TExp">; def ArchV6 : SubtargetFeature<"v6", "ARMArchVersion", "V6", "ARM v6">; +def ArchV6T2 : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2", + "ARM v6t2">; def ArchV7A : SubtargetFeature<"v7a", "ARMArchVersion", "V7A", "ARM v7A">; def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2", @@ -92,9 +94,11 @@ def : Proc<"arm1176jzf-s", [ArchV6, FeatureVFP2]>; def : Proc<"mpcorenovfp", [ArchV6]>; def : Proc<"mpcore", [ArchV6, FeatureVFP2]>; -def : Proc<"arm1156t2-s", [ArchV6, FeatureThumb2]>; -def : Proc<"arm1156t2f-s", [ArchV6, FeatureThumb2, FeatureVFP2]>; +// V6T2 Processors. +def : Proc<"arm1156t2-s", [ArchV6T2, FeatureThumb2]>; +def : Proc<"arm1156t2f-s", [ArchV6T2, FeatureThumb2, FeatureVFP2]>; +// V7 Processors. def : Proc<"cortex-a8", [ArchV7A, FeatureThumb2, FeatureNEON]>; def : Proc<"cortex-a9", [ArchV7A, FeatureThumb2, FeatureNEON]>; diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 6cd786e..f126760 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -17,6 +17,11 @@ class CCIfSubtarget<string F, CCAction A>: class CCIfAlign<string Align, CCAction A>: CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; +/// CCIfFloatABI - Match of the float ABI and the arg. ABIType may be "Hard" or +/// "Soft". +class CCIfFloatABI<string ABIType, CCAction A>: + CCIf<!strconcat("llvm::FloatABIType == llvm::FloatABI::", ABIType), A>; + //===----------------------------------------------------------------------===// // ARM APCS Calling Convention //===----------------------------------------------------------------------===// @@ -43,9 +48,10 @@ def RetCC_ARM_APCS : CallingConv<[ ]>; //===----------------------------------------------------------------------===// -// ARM AAPCS (EABI) Calling Convention +// ARM AAPCS (EABI) Calling Convention, common parts //===----------------------------------------------------------------------===// -def CC_ARM_AAPCS : CallingConv<[ + +def CC_ARM_AAPCS_Common : CallingConv<[ CCIfType<[i8, i16], CCPromoteToType<i32>>, @@ -53,23 +59,51 @@ def CC_ARM_AAPCS : CallingConv<[ // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register // (and the same is true for f64 if VFP is not enabled) CCIfType<[i32], CCIfAlign<"8", CCAssignToRegWithShadow<[R0, R2], [R0, R1]>>>, - CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, - - CCIfType<[f32], CCBitConvertToType<i32>>, CCIfType<[i32], CCIf<"State.getNextStackOffset() == 0 &&" "ArgFlags.getOrigAlign() != 8", CCAssignToReg<[R0, R1, R2, R3]>>>, - CCIfType<[i32], CCAssignToStack<4, 4>>, + CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[f64], CCAssignToStack<8, 8>> ]>; -def RetCC_ARM_AAPCS : CallingConv<[ +def RetCC_ARM_AAPCS_Common : CallingConv<[ + CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, + CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +]>; + +//===----------------------------------------------------------------------===// +// ARM AAPCS (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS : CallingConv<[ + CCIfType<[f64], CCCustom<"CC_ARM_AAPCS_Custom_f64">>, CCIfType<[f32], CCBitConvertToType<i32>>, + CCDelegateTo<CC_ARM_AAPCS_Common> +]>; + +def RetCC_ARM_AAPCS : CallingConv<[ CCIfType<[f64], CCCustom<"RetCC_ARM_AAPCS_Custom_f64">>, + CCIfType<[f32], CCBitConvertToType<i32>>, + CCDelegateTo<RetCC_ARM_AAPCS_Common> +]>; - CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, - CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> +//===----------------------------------------------------------------------===// +// ARM AAPCS-VFP (EABI) Calling Convention +//===----------------------------------------------------------------------===// + +def CC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<CC_ARM_AAPCS_Common> +]>; + +def RetCC_ARM_AAPCS_VFP : CallingConv<[ + CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, + CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, + CCDelegateTo<RetCC_ARM_AAPCS_Common> ]>; //===----------------------------------------------------------------------===// @@ -77,11 +111,19 @@ def RetCC_ARM_AAPCS : CallingConv<[ //===----------------------------------------------------------------------===// def CC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", + CCIfSubtarget<"hasVFP2()", + CCIfFloatABI<"Hard", + CCDelegateTo<CC_ARM_AAPCS_VFP>>>>, CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<CC_ARM_AAPCS>>, CCDelegateTo<CC_ARM_APCS> ]>; def RetCC_ARM : CallingConv<[ + CCIfSubtarget<"isAAPCS_ABI()", + CCIfSubtarget<"hasVFP2()", + CCIfFloatABI<"Hard", + CCDelegateTo<RetCC_ARM_AAPCS_VFP>>>>, CCIfSubtarget<"isAAPCS_ABI()", CCDelegateTo<RetCC_ARM_AAPCS>>, CCDelegateTo<RetCC_ARM_APCS> ]>; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c0fd9dc..ec8bd1f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1101,7 +1101,12 @@ ARMTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { else RC = ARM::GPRRegisterClass; - if (RegVT == MVT::f64) { + if (FloatABIType == FloatABI::Hard) { + if (RegVT == MVT::f32) + RC = ARM::SPRRegisterClass; + else if (RegVT == MVT::f64) + RC = ARM::DPRRegisterClass; + } else if (RegVT == MVT::f64) { // f64 is passed in pairs of GPRs and must be combined. RegVT = MVT::i32; } else if (!((RegVT == MVT::i32) || (RegVT == MVT::f32))) diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 680e772..cc9f1a5 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -451,7 +451,7 @@ multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> { /// the function. The first operand is the ID# for this instruction, the second /// is the index into the MachineConstantPool that this is, the third is the /// size in bytes of this constant pool entry. -let isNotDuplicable = 1 in +let neverHasSideEffects = 1, isNotDuplicable = 1 in def CONSTPOOL_ENTRY : PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx, i32imm:$size), @@ -771,6 +771,7 @@ def STM : AXI4st<(outs), // Move Instructions. // +let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, "mov", " $dst, $src", []>, UnaryDP; def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, @@ -946,6 +947,7 @@ def MLA : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), [(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>; // Extra precision multiplies with low / high results +let neverHasSideEffects = 1 in { def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), "smull", " $ldst, $hdst, $a, $b", []>; @@ -967,6 +969,7 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), "umaal", " $ldst, $hdst, $a, $b", []>, Requires<[IsARM, HasV6]>; +} // neverHasSideEffects // Most significant word multiply def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b), diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index ffb83a8..54232f6 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -298,6 +298,7 @@ def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "add $dst, $lhs, $rhs", [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>; +let neverHasSideEffects = 1 in def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs), "add $dst, $rhs @ addhirr", []>; @@ -387,6 +388,7 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src), // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy', // which is MOV(3). This also supports high registers. +let neverHasSideEffects = 1 in { def tMOVr : TI<(outs tGPR:$dst), (ins tGPR:$src), "cpy $dst, $src", []>; def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src), @@ -395,6 +397,7 @@ def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src), "cpy $dst, $src\t@ lor2hir", []>; def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src), "cpy $dst, $src\t@ hir2hir", []>; +} // neverHasSideEffects def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), "mul $dst, $rhs", diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 0247daf..9104c77 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -192,11 +192,13 @@ def FCVTSD : AI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, let Inst{7-4} = 0b1100; } +let neverHasSideEffects = 1 in { def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fcpyd", " $dst, $a", []>; def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), "fcpys", " $dst, $a", []>; +} // neverHasSideEffects def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), "fnegd", " $dst, $a", diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 963ff0d..684ecb4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -17,24 +17,31 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Support/Compiler.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); +STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); + +/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine +/// load / store instructions to form ldm / stm instructions. namespace { struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass { @@ -81,12 +88,6 @@ namespace { char ARMLoadStoreOpt::ID = 0; } -/// createARMLoadStoreOptimizationPass - returns an instance of the load / store -/// optimization pass. -FunctionPass *llvm::createARMLoadStoreOptimizationPass() { - return new ARMLoadStoreOpt(); -} - static int getLoadStoreMultipleOpcode(int Opcode) { switch (Opcode) { case ARM::LDR: @@ -582,6 +583,23 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { RS->forward(prior(Loc)); } +static int getMemoryOpOffset(const MachineInstr *MI) { + int Opcode = MI->getOpcode(); + bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + unsigned NumOperands = MI->getDesc().getNumOperands(); + unsigned OffField = MI->getOperand(NumOperands-3).getImm(); + int Offset = isAM2 + ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + if (isAM2) { + if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } else { + if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) + Offset = -Offset; + } + return Offset; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { @@ -606,22 +624,11 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { bool isMemOp = isMemoryOp(MBBI); if (isMemOp) { int Opcode = MBBI->getOpcode(); - bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; unsigned Size = getLSMultipleTransferSize(MBBI); unsigned Base = MBBI->getOperand(1).getReg(); unsigned PredReg = 0; ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg); - unsigned NumOperands = MBBI->getDesc().getNumOperands(); - unsigned OffField = MBBI->getOperand(NumOperands-3).getImm(); - int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; - if (isAM2) { - if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } else { - if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) - Offset = -Offset; - } + int Offset = getMemoryOpOffset(MBBI); // Watch out for: // r4 := ldr [r5] // r5 := ldr [r5, #4] @@ -744,6 +751,17 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { return NumMerges > 0; } +namespace { + struct OffsetCompare { + bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { + int LOffset = getMemoryOpOffset(LHS); + int ROffset = getMemoryOpOffset(RHS); + assert(LHS == RHS || LOffset != ROffset); + return LOffset > ROffset; + } + }; +} + /// MergeReturnIntoLDM - If this is a exit BB, try merging the return op /// (bx lr) into the preceeding stack restore so it directly restore the value /// of LR into pc. @@ -788,3 +806,277 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { delete RS; return Modified; } + + +/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move +/// load / stores from consecutive locations close to make it more +/// likely they will be combined later. + +namespace { + struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{ + static char ID; + ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; + MachineRegisterInfo *MRI; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM pre- register allocation load / store optimization pass"; + } + + private: + bool RescheduleOps(MachineBasicBlock *MBB, + SmallVector<MachineInstr*, 4> &Ops, + unsigned Base, bool isLd, + DenseMap<MachineInstr*, unsigned> &MI2LocMap); + bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB); + }; + char ARMPreAllocLoadStoreOpt::ID = 0; +} + +bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TII = Fn.getTarget().getInstrInfo(); + TRI = Fn.getTarget().getRegisterInfo(); + MRI = &Fn.getRegInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; + ++MFI) + Modified |= RescheduleLoadStoreInstrs(MFI); + + return Modified; +} + +static bool IsSafeToMove(bool isLd, unsigned Base, + MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallPtrSet<MachineInstr*, 4> MoveOps, + const TargetRegisterInfo *TRI) { + // Are there stores / loads / calls between them? + // FIXME: This is overly conservative. We should make use of alias information + // some day. + while (++I != E) { + const TargetInstrDesc &TID = I->getDesc(); + if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects()) + return false; + if (isLd && TID.mayStore()) + return false; + if (!isLd) { + if (TID.mayLoad()) + return false; + // It's not safe to move the first 'str' down. + // str r1, [r0] + // strh r5, [r0] + // str r4, [r0, #+4] + if (TID.mayStore() && !MoveOps.count(&*I)) + return false; + } + for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) { + MachineOperand &MO = I->getOperand(j); + if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base)) + return false; + } + } + return true; +} + +bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, + SmallVector<MachineInstr*, 4> &Ops, + unsigned Base, bool isLd, + DenseMap<MachineInstr*, unsigned> &MI2LocMap) { + bool RetVal = false; + + // Sort by offset (in reverse order). + std::sort(Ops.begin(), Ops.end(), OffsetCompare()); + + // The loads / stores of the same base are in order. Scan them from first to + // last and check for the followins: + // 1. Any def of base. + // 2. Any gaps. + while (Ops.size() > 1) { + unsigned FirstLoc = ~0U; + unsigned LastLoc = 0; + MachineInstr *FirstOp = 0; + MachineInstr *LastOp = 0; + int LastOffset = 0; + unsigned LastBytes = 0; + unsigned NumMove = 0; + for (int i = Ops.size() - 1; i >= 0; --i) { + MachineInstr *Op = Ops[i]; + unsigned Loc = MI2LocMap[Op]; + if (Loc <= FirstLoc) { + FirstLoc = Loc; + FirstOp = Op; + } + if (Loc >= LastLoc) { + LastLoc = Loc; + LastOp = Op; + } + + int Offset = getMemoryOpOffset(Op); + unsigned Bytes = getLSMultipleTransferSize(Op); + if (LastBytes) { + if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes)) + break; + } + LastOffset = Offset; + LastBytes = Bytes; + if (++NumMove == 4) + break; + } + + if (NumMove <= 1) + Ops.pop_back(); + else { + SmallPtrSet<MachineInstr*, 4> MoveOps; + for (int i = NumMove-1; i >= 0; --i) + MoveOps.insert(Ops[i]); + + // Be conservative, if the instructions are too far apart, don't + // move them. We want to limit the increase of register pressure. + bool DoMove = (LastLoc - FirstLoc) < NumMove*4; + if (DoMove) + DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI); + if (!DoMove) { + for (unsigned i = 0; i != NumMove; ++i) + Ops.pop_back(); + } else { + // This is the new location for the loads / stores. + MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; + while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) + ++InsertPos; + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + MBB->splice(InsertPos, MBB, Op); + } + + NumLdStMoved += NumMove; + RetVal = true; + } + } + } + + return RetVal; +} + +bool +ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { + bool RetVal = false; + + DenseMap<MachineInstr*, unsigned> MI2LocMap; + DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap; + DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap; + SmallVector<unsigned, 4> LdBases; + SmallVector<unsigned, 4> StBases; + + unsigned Loc = 0; + MachineBasicBlock::iterator MBBI = MBB->begin(); + MachineBasicBlock::iterator E = MBB->end(); + while (MBBI != E) { + for (; MBBI != E; ++MBBI) { + MachineInstr *MI = MBBI; + const TargetInstrDesc &TID = MI->getDesc(); + if (TID.isCall() || TID.isTerminator()) { + // Stop at barriers. + ++MBBI; + break; + } + + MI2LocMap[MI] = Loc++; + if (!isMemoryOp(MI)) + continue; + unsigned PredReg = 0; + if (getInstrPredicate(MI, PredReg) != ARMCC::AL) + continue; + + int Opcode = MI->getOpcode(); + bool isLd = Opcode == ARM::LDR || + Opcode == ARM::FLDS || Opcode == ARM::FLDD; + unsigned Base = MI->getOperand(1).getReg(); + int Offset = getMemoryOpOffset(MI); + + bool StopHere = false; + if (isLd) { + DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + Base2LdsMap.find(Base); + if (BI != Base2LdsMap.end()) { + for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { + if (Offset == getMemoryOpOffset(BI->second[i])) { + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(MI); + } else { + SmallVector<MachineInstr*, 4> MIs; + MIs.push_back(MI); + Base2LdsMap[Base] = MIs; + LdBases.push_back(Base); + } + } else { + DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI = + Base2StsMap.find(Base); + if (BI != Base2StsMap.end()) { + for (unsigned i = 0, e = BI->second.size(); i != e; ++i) { + if (Offset == getMemoryOpOffset(BI->second[i])) { + StopHere = true; + break; + } + } + if (!StopHere) + BI->second.push_back(MI); + } else { + SmallVector<MachineInstr*, 4> MIs; + MIs.push_back(MI); + Base2StsMap[Base] = MIs; + StBases.push_back(Base); + } + } + + if (StopHere) { + // Found a duplicate (a base+offset combination that's seen earlier). Backtrack. + --Loc; + break; + } + } + + // Re-schedule loads. + for (unsigned i = 0, e = LdBases.size(); i != e; ++i) { + unsigned Base = LdBases[i]; + SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base]; + if (Lds.size() > 1) + RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap); + } + + // Re-schedule stores. + for (unsigned i = 0, e = StBases.size(); i != e; ++i) { + unsigned Base = StBases[i]; + SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base]; + if (Sts.size() > 1) + RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap); + } + + if (MBBI != E) { + Base2LdsMap.clear(); + Base2StsMap.clear(); + LdBases.clear(); + StBases.clear(); + } + } + + return RetVal; +} + + +/// createARMLoadStoreOptimizationPass - returns an instance of the load / store +/// optimization pass. +FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) { + if (PreAlloc) + return new ARMPreAllocLoadStoreOpt(); + return new ARMLoadStoreOpt(); +} diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index b95d1f9..ebe7d58 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -219,3 +219,18 @@ def DPR : RegisterClass<"ARM", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7, D8, // Condition code registers. def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]>; + +//===----------------------------------------------------------------------===// +// Subregister Set Definitions... now that we have all of the pieces, define the +// sub registers for each register. +// + +def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S0, S2, S4, S6, S8, S10, S12, S14, + S16, S18, S20, S22, S24, S26, S28, S30]>; + +def : SubRegSet<2, [D0, D1, D2, D3, D4, D5, D6, D7, + D8, D9, D10, D11, D12, D13, D14, D15], + [S1, S3, S5, S7, S9, S11, S13, S15, + S17, S19, S21, S23, S25, S27, S29, S31]>; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index ef78cd5..a978380 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -14,6 +14,8 @@ #include "ARMSubtarget.h" #include "ARMGenSubtarget.inc" #include "llvm/Module.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" using namespace llvm; ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, @@ -28,6 +30,10 @@ ARMSubtarget::ARMSubtarget(const Module &M, const std::string &FS, , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. , TargetABI(ARM_ABI_APCS) { + // default to soft float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Soft; + // Determine default and user specified characteristics // Parse features string. diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 8b469cf..0704055 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -23,7 +23,7 @@ class Module; class ARMSubtarget : public TargetSubtarget { protected: enum ARMArchEnum { - V4T, V5T, V5TE, V6, V7A + V4T, V5T, V5TE, V6, V6T2, V7A }; enum ARMFPEnum { @@ -92,6 +92,7 @@ protected: bool hasV5TOps() const { return ARMArchVersion >= V5T; } bool hasV5TEOps() const { return ARMArchVersion >= V5TE; } bool hasV6Ops() const { return ARMArchVersion >= V6; } + bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } bool hasVFP2() const { return ARMFPUType >= VFPv2; } @@ -105,6 +106,7 @@ protected: bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; } bool isThumb() const { return IsThumb; } + bool isThumb1() const { return IsThumb && (ThumbMode == Thumb1); } bool isThumb2() const { return IsThumb && (ThumbMode >= Thumb2); } bool useThumbBacktraces() const { return UseThumbBacktraces; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 1dc7d19..7033907 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -23,6 +23,9 @@ #include "llvm/Target/TargetOptions.h" using namespace llvm; +static cl::opt<bool> +EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden, + cl::desc("Enable pre-regalloc load store optimization pass")); static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden, cl::desc("Disable load store optimization pass")); static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden, @@ -144,6 +147,16 @@ bool ARMTargetMachine::addInstSelector(PassManagerBase &PM, return false; } +bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM, + CodeGenOpt::Level OptLevel) { + if (!EnablePreLdStOpti) + return false; + // FIXME: temporarily disabling load / store optimization pass for Thumb mode. + if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb()) + PM.add(createARMLoadStoreOptimizationPass(true)); + return true; +} + bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { // FIXME: temporarily disabling load / store optimization pass for Thumb mode. diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 916a8aa..7192c1b 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -71,6 +71,7 @@ public: // Pass Pipeline Configuration virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); + virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); virtual bool addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 1cf0a91..7cffd0e 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_library(LLVMTarget Target.cpp TargetAsmInfo.cpp TargetData.cpp + TargetELFWriterInfo.cpp TargetFrameInfo.cpp TargetInstrInfo.cpp TargetMachOWriterInfo.cpp @@ -14,4 +15,4 @@ add_llvm_library(LLVMTarget TargetSubtarget.cpp ) -# TODO: Support other targets besides X86. See Makefile.
\ No newline at end of file +# TODO: Support other targets besides X86. See Makefile. diff --git a/lib/Target/PIC16/PIC16AsmPrinter.cpp b/lib/Target/PIC16/PIC16AsmPrinter.cpp index b42ee45..f9a8801 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/PIC16AsmPrinter.cpp @@ -33,8 +33,9 @@ bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { return true; } -/// runOnMachineFunction - This uses the printInstruction() -/// method to print assembly for each instruction. +/// runOnMachineFunction - This emits the frame section, autos section and +/// assembly for each instruction. Also takes care of function begin debug +/// directive and file begin debug directive (if required) for the function. /// bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { this->MF = &MF; @@ -47,20 +48,38 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { const Function *F = MF.getFunction(); CurrentFnName = Mang->getValueName(F); - DbgInfo.EmitFileDirective(F); - // Emit the function variables. + // Iterate over the first basic block instructions to find if it has a + // DebugLoc. If so emit .file directive. Instructions such as movlw do not + // have valid DebugLoc, so need to iterate over instructions. + MachineFunction::const_iterator I = MF.begin(); + for (MachineBasicBlock::const_iterator MBBI = I->begin(), E = I->end(); + MBBI != E; MBBI++) { + const DebugLoc DLoc = MBBI->getDebugLoc(); + if (!DLoc.isUnknown()) { + GlobalVariable *CU = MF.getDebugLocTuple(DLoc).CompileUnit; + unsigned line = MF.getDebugLocTuple(DLoc).Line; + DbgInfo.EmitFileDirective(CU); + DbgInfo.SetFunctBeginLine(line); + break; + } + } + + // Emit the function frame (args and temps). EmitFunctionFrame(MF); - // Emit function begin debug directives + // Emit function begin debug directive. DbgInfo.EmitFunctBeginDI(F); + // Emit the autos section of function. EmitAutos(CurrentFnName); + + // Now emit the instructions of function in its code section. const char *codeSection = PAN::getCodeSectionName(CurrentFnName).c_str(); const Section *fCodeSection = TAI->getNamedSection(codeSection, SectionFlags::Code); - O << "\n"; // Start the Code Section. + O << "\n"; SwitchToSection (fCodeSection); // Emit the frame address of the function at the beginning of code. @@ -77,14 +96,17 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out code for the function. for (MachineFunction::const_iterator I = MF.begin(), E = MF.end(); I != E; ++I) { + // Print a label for the basic block. if (I != MF.begin()) { printBasicBlockLabel(I, true); O << '\n'; } + // Print a basic block. for (MachineBasicBlock::const_iterator II = I->begin(), E = I->end(); II != E; ++II) { + // Emit the line directive if source line changed. const DebugLoc DL = II->getDebugLoc(); if (!DL.isUnknown()) { @@ -102,6 +124,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Emit function end debug directives. DbgInfo.EmitFunctEndDI(F, CurLine); + return false; // we didn't modify anything. } @@ -158,11 +181,16 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) { } } +/// printCCOperand - Print the cond code operand. +/// void PIC16AsmPrinter::printCCOperand(const MachineInstr *MI, int opNum) { int CC = (int)MI->getOperand(opNum).getImm(); O << PIC16CondCodeToString((PIC16CC::CondCodes)CC); } +/// printLibcallDecls - print the extern declarations for compiler +/// intrinsics. +/// void PIC16AsmPrinter::printLibcallDecls(void) { // If no libcalls used, return. if (LibcallDecls.empty()) return; @@ -180,6 +208,10 @@ void PIC16AsmPrinter::printLibcallDecls(void) { O << TAI->getCommentString() << "External decls for libcalls - END." <<"\n"; } +/// doInitialization - Perfrom Module level initializations here. +/// One task that we do here is to sectionize all global variables. +/// The MemSelOptimizer pass depends on the sectionizing. +/// bool PIC16AsmPrinter::doInitialization (Module &M) { bool Result = AsmPrinter::doInitialization(M); @@ -194,23 +226,23 @@ bool PIC16AsmPrinter::doInitialization (Module &M) { I->setSection(TAI->SectionForGlobal(I)->getName()); } - DbgInfo.EmitFileDirective(M); + DbgInfo.Init(M); EmitFunctionDecls(M); EmitUndefinedVars(M); EmitDefinedVars(M); EmitIData(M); EmitUData(M); EmitRomData(M); - DbgInfo.PopulateFunctsDI(M); return Result; } -// Emit extern decls for functions imported from other modules, and emit -// global declarations for function defined in this module and which are -// available to other modules. +/// Emit extern decls for functions imported from other modules, and emit +/// global declarations for function defined in this module and which are +/// available to other modules. +/// void PIC16AsmPrinter::EmitFunctionDecls (Module &M) { // Emit declarations for external functions. - O << TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n"; + O <<"\n"<<TAI->getCommentString() << "Function Declarations - BEGIN." <<"\n"; for (Module::iterator I = M.begin(), E = M.end(); I != E; I++) { std::string Name = Mang->getValueName(I); if (Name.compare("@abort") == 0) @@ -280,6 +312,7 @@ void PIC16AsmPrinter::EmitRomData (Module &M) bool PIC16AsmPrinter::doFinalization(Module &M) { printLibcallDecls(); + EmitRemainingAutos(); DbgInfo.EmitVarDebugInfo(M); DbgInfo.EmitEOF(); O << "\n\t" << "END\n"; @@ -383,6 +416,8 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName) for (unsigned i = 0; i < AutosSections.size(); i++) { O << "\n"; if (AutosSections[i]->S_->getName() == SectionName) { + // Set the printing status to true + AutosSections[i]->setPrintedStatus(true); SwitchToSection(AutosSections[i]->S_); std::vector<const GlobalVariable*> Items = AutosSections[i]->Items; for (unsigned j = 0; j < Items.size(); j++) { @@ -398,3 +433,34 @@ void PIC16AsmPrinter::EmitAutos (std::string FunctName) } } +// Print autos that were not printed during the code printing of functions. +// As the functions might themselves would have got deleted by the optimizer. +void PIC16AsmPrinter::EmitRemainingAutos() +{ + const TargetData *TD = TM.getTargetData(); + + // Now print Autos section for this function. + std::vector <PIC16Section *>AutosSections = PTAI->AutosSections; + for (unsigned i = 0; i < AutosSections.size(); i++) { + + // if the section is already printed then don't print again + if (AutosSections[i]->isPrinted()) + continue; + + // Set status as printed + AutosSections[i]->setPrintedStatus(true); + + O << "\n"; + SwitchToSection(AutosSections[i]->S_); + std::vector<const GlobalVariable*> Items = AutosSections[i]->Items; + for (unsigned j = 0; j < Items.size(); j++) { + std::string VarName = Mang->getValueName(Items[j]); + Constant *C = Items[j]->getInitializer(); + const Type *Ty = C->getType(); + unsigned Size = TD->getTypeAllocSize(Ty); + // Emit memory reserve directive. + O << VarName << " RES " << Size << "\n"; + } + } +} + diff --git a/lib/Target/PIC16/PIC16AsmPrinter.h b/lib/Target/PIC16/PIC16AsmPrinter.h index 2545dfd..8bdcf72 100644 --- a/lib/Target/PIC16/PIC16AsmPrinter.h +++ b/lib/Target/PIC16/PIC16AsmPrinter.h @@ -52,6 +52,7 @@ namespace llvm { void EmitIData (Module &M); void EmitUData (Module &M); void EmitAutos (std::string FunctName); + void EmitRemainingAutos (); void EmitRomData (Module &M); void EmitFunctionFrame(MachineFunction &MF); void printLibcallDecls(void); diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp index faf4590..d7ebea7 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.cpp +++ b/lib/Target/PIC16/PIC16DebugInfo.cpp @@ -18,13 +18,6 @@ using namespace llvm; -PIC16DbgInfo::~PIC16DbgInfo() { - for(std::map<std::string, DISubprogram *>::iterator i = FunctNameMap.begin(); - i!=FunctNameMap.end(); i++) - delete i->second; - FunctNameMap.clear(); -} - void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TypeName) { @@ -70,7 +63,7 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, } HasAux = true; // In auxillary entry for array, 7th and 8th byte represent array size. - Aux[6] = size; + Aux[6] = size & 0xff; Aux[7] = size >> 8; DIType BaseType = CTy.getTypeDerivedFrom(); PopulateDebugInfo(BaseType, TypeNo, HasAux, Aux, TypeName); @@ -86,10 +79,14 @@ void PIC16DbgInfo::PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, else TypeNo = TypeNo | PIC16Dbg::T_UNION; CTy.getName(TypeName); - unsigned size = CTy.getSizeInBits()/8; + // UniqueSuffix is .number where number is obtained from + // llvm.dbg.composite<number>. + std::string UniqueSuffix = "." + Ty.getGV()->getName().substr(18); + TypeName += UniqueSuffix; + unsigned short size = CTy.getSizeInBits()/8; // 7th and 8th byte represent size. HasAux = true; - Aux[6] = size; + Aux[6] = size & 0xff; Aux[7] = size >> 8; break; } @@ -145,37 +142,84 @@ short PIC16DbgInfo::getClass(DIGlobalVariable DIGV) { return ClassNo; } -void PIC16DbgInfo::PopulateFunctsDI(Module &M) { - GlobalVariable *Root = M.getGlobalVariable("llvm.dbg.subprograms"); - if (!Root) - return; - Constant *RootC = cast<Constant>(*Root->use_begin()); - - for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end(); - UI != UE; ++UI) - for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end(); - UUI != UUE; ++UUI) { - GlobalVariable *GVSP = cast<GlobalVariable>(*UUI); - DISubprogram *SP = new DISubprogram(GVSP); - std::string Name; - SP->getLinkageName(Name); - FunctNameMap[Name] = SP; - } - return; +void PIC16DbgInfo::Init(Module &M) { + // Do all debug related initializations here. + EmitFileDirective(M); + EmitCompositeTypeDecls(M); } -DISubprogram* PIC16DbgInfo::getFunctDI(std::string FunctName) { - return FunctNameMap[FunctName]; +void PIC16DbgInfo::EmitCompositeTypeDecls(Module &M) { + for(iplist<GlobalVariable>::iterator I = M.getGlobalList().begin(), + E = M.getGlobalList().end(); I != E; I++) { + // Structures and union declaration's debug info has llvm.dbg.composite + // in its name. + if(I->getName().find("llvm.dbg.composite") != std::string::npos) { + GlobalVariable *GV = cast<GlobalVariable >(I); + DICompositeType CTy(GV); + if (CTy.getTag() == dwarf::DW_TAG_union_type || + CTy.getTag() == dwarf::DW_TAG_structure_type ) { + std::string name; + CTy.getName(name); + std::string DIVar = I->getName(); + // Get the number after llvm.dbg.composite and make UniqueSuffix from + // it. + std::string UniqueSuffix = "." + DIVar.substr(18); + std::string MangledCTyName = name + UniqueSuffix; + unsigned short size = CTy.getSizeInBits()/8; + int Aux[PIC16Dbg::AuxSize] = {0}; + // 7th and 8th byte represent size of structure/union. + Aux[6] = size & 0xff; + Aux[7] = size >> 8; + // Emit .def for structure/union tag. + if( CTy.getTag() == dwarf::DW_TAG_union_type) + EmitSymbol(MangledCTyName, PIC16Dbg::C_UNTAG); + else if (CTy.getTag() == dwarf::DW_TAG_structure_type) + EmitSymbol(MangledCTyName, PIC16Dbg::C_STRTAG); + + // Emit auxiliary debug information for structure/union tag. + EmitAuxEntry(MangledCTyName, Aux, PIC16Dbg::AuxSize); + unsigned long Value = 0; + DIArray Elements = CTy.getTypeArray(); + for (unsigned i = 0, N = Elements.getNumElements(); i < N; i++) { + DIDescriptor Element = Elements.getElement(i); + unsigned short TypeNo = 0; + bool HasAux = false; + int ElementAux[PIC16Dbg::AuxSize] = { 0 }; + std::string TypeName = ""; + std::string ElementName; + GlobalVariable *GV = Element.getGV(); + DIDerivedType DITy(GV); + DITy.getName(ElementName); + unsigned short ElementSize = DITy.getSizeInBits()/8; + // Get mangleddd name for this structure/union element. + std::string MangMemName = ElementName + UniqueSuffix; + PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TypeName); + short Class; + if( CTy.getTag() == dwarf::DW_TAG_union_type) + Class = PIC16Dbg::C_MOU; + else if (CTy.getTag() == dwarf::DW_TAG_structure_type) + Class = PIC16Dbg::C_MOS; + EmitSymbol(MangMemName, Class, TypeNo, Value); + if (CTy.getTag() == dwarf::DW_TAG_structure_type) + Value += ElementSize; + if (HasAux) + EmitAuxEntry(MangMemName, ElementAux, PIC16Dbg::AuxSize, TypeName); + } + // Emit mangled Symbol for end of structure/union. + std::string EOSSymbol = ".eos" + UniqueSuffix; + EmitSymbol(EOSSymbol, PIC16Dbg::C_EOS); + EmitAuxEntry(EOSSymbol, Aux, PIC16Dbg::AuxSize, MangledCTyName); + } + } + } } void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { std::string FunctName = F->getName(); - DISubprogram *SP = getFunctDI(FunctName); - if (SP) { + if (EmitDebugDirectives) { std::string FunctBeginSym = ".bf." + FunctName; std::string BlockBeginSym = ".bb." + FunctName; - int FunctBeginLine = SP->getLineNumber(); int BFAux[PIC16Dbg::AuxSize] = {0}; BFAux[4] = FunctBeginLine; BFAux[5] = FunctBeginLine >> 8; @@ -189,8 +233,7 @@ void PIC16DbgInfo::EmitFunctBeginDI(const Function *F) { void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { std::string FunctName = F->getName(); - DISubprogram *SP = getFunctDI(FunctName); - if (SP) { + if (EmitDebugDirectives) { std::string FunctEndSym = ".ef." + FunctName; std::string BlockEndSym = ".eb." + FunctName; @@ -208,14 +251,21 @@ void PIC16DbgInfo::EmitFunctEndDI(const Function *F, unsigned Line) { /// EmitAuxEntry - Emit Auxiliary debug information. /// -void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num) { +void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int num, + std::string tag) { O << "\n\t.dim " << VarName << ", 1" ; + if (tag != "") + O << ", " << tag; for (int i = 0; i<num; i++) O << "," << Aux[i]; } -void PIC16DbgInfo::EmitSymbol(std::string Name, int Class) { - O << "\n\t" << ".def "<< Name << ", debug, class = " << Class; +void PIC16DbgInfo::EmitSymbol(std::string Name, short Class, unsigned short + Type, unsigned long Value) { + O << "\n\t" << ".def "<< Name << ", type = " << Type << ", class = " + << Class; + if (Value > 0) + O << ", value = " << Value; } void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { @@ -241,18 +291,8 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { O << "\n\t.type " << VarName << ", " << TypeNo; short ClassNo = getClass(DIGV); O << "\n\t.class " << VarName << ", " << ClassNo; - if (HasAux) { - if (TypeName != "") { - // Emit debug info for structure and union objects after - // .dim directive supports structure/union tag name in aux entry. - /* O << "\n\t.dim " << VarName << ", 1," << TypeName; - for (int i = 0; i<PIC16Dbg::AuxSize; i++) - O << "," << Aux[i];*/ - } - else { - EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize); - } - } + if (HasAux) + EmitAuxEntry(VarName, Aux, PIC16Dbg::AuxSize, TypeName); } } } @@ -262,26 +302,20 @@ void PIC16DbgInfo::EmitVarDebugInfo(Module &M) { void PIC16DbgInfo::EmitFileDirective(Module &M) { GlobalVariable *CU = M.getNamedGlobal("llvm.dbg.compile_unit"); if (CU) { - DICompileUnit DIUnit(CU); - std::string Dir, FN; - std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN); - O << "\n\t.file\t\"" << File << "\"\n" ; - CurFile = File; + EmitDebugDirectives = true; + EmitFileDirective(CU, false); } } -void PIC16DbgInfo::EmitFileDirective(const Function *F) { - std::string FunctName = F->getName(); - DISubprogram *SP = getFunctDI(FunctName); - if (SP) { - std::string Dir, FN; - DICompileUnit CU = SP->getCompileUnit(); - std::string File = CU.getDirectory(Dir) + "/" + CU.getFilename(FN); - if ( File != CurFile) { +void PIC16DbgInfo::EmitFileDirective(GlobalVariable *CU, bool EmitEof) { + std::string Dir, FN; + DICompileUnit DIUnit(CU); + std::string File = DIUnit.getDirectory(Dir) + "/" + DIUnit.getFilename(FN); + if ( File != CurFile ) { + if (EmitEof) EmitEOF(); - O << "\n\t.file\t\"" << File << "\"\n" ; - CurFile = File; - } + O << "\n\t.file\t\"" << File << "\"\n" ; + CurFile = File; } } @@ -290,3 +324,6 @@ void PIC16DbgInfo::EmitEOF() { O << "\n\t.EOF"; } +void PIC16DbgInfo::SetFunctBeginLine(unsigned line) { + FunctBeginLine = line; +} diff --git a/lib/Target/PIC16/PIC16DebugInfo.h b/lib/Target/PIC16/PIC16DebugInfo.h index be39393..9d50380 100644 --- a/lib/Target/PIC16/PIC16DebugInfo.h +++ b/lib/Target/PIC16/PIC16DebugInfo.h @@ -91,29 +91,36 @@ namespace llvm { class raw_ostream; class PIC16DbgInfo { - std::map <std::string, DISubprogram *> FunctNameMap; raw_ostream &O; const TargetAsmInfo *TAI; std::string CurFile; + // EmitDebugDirectives is set if debug information is available. Default + // value for it is false. + bool EmitDebugDirectives; + unsigned FunctBeginLine; public: PIC16DbgInfo(raw_ostream &o, const TargetAsmInfo *T) : O(o), TAI(T) { - CurFile = ""; + CurFile = ""; + EmitDebugDirectives = false; } - ~PIC16DbgInfo(); void PopulateDebugInfo(DIType Ty, unsigned short &TypeNo, bool &HasAux, int Aux[], std::string &TypeName); unsigned GetTypeDebugNumber(std::string &type); short getClass(DIGlobalVariable DIGV); - void PopulateFunctsDI(Module &M); - DISubprogram *getFunctDI(std::string FunctName); void EmitFunctBeginDI(const Function *F); + void Init(Module &M); + void EmitCompositeTypeDecls(Module &M); void EmitFunctEndDI(const Function *F, unsigned Line); - void EmitAuxEntry(const std::string VarName, int Aux[], int num); - inline void EmitSymbol(std::string Name, int Class); + void EmitAuxEntry(const std::string VarName, int Aux[], + int num = PIC16Dbg::AuxSize, std::string tag = ""); + inline void EmitSymbol(std::string Name, short Class, + unsigned short Type = PIC16Dbg::T_NULL, + unsigned long Value = 0); void EmitVarDebugInfo(Module &M); void EmitFileDirective(Module &M); - void EmitFileDirective(const Function *F); + void EmitFileDirective(GlobalVariable *CU, bool EmitEof = true); void EmitEOF(); + void SetFunctBeginLine(unsigned line); }; } // end namespace llvm; #endif diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index ac9a143..ba465f3 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -56,6 +56,17 @@ static const char *getIntrinsicName(unsigned opcode) { case RTLIB::SREM_I32: Basename = "srem.i32"; break; case RTLIB::UREM_I16: Basename = "urem.i16"; break; case RTLIB::UREM_I32: Basename = "urem.i32"; break; + + case RTLIB::FPTOSINT_F32_I32: + Basename = "f32_to_si32"; break; + case RTLIB::SINTTOFP_I32_F32: + Basename = "si32_to_f32"; break; + + case RTLIB::ADD_F32: Basename = "add.f32"; break; + case RTLIB::SUB_F32: Basename = "sub.f32"; break; + case RTLIB::MUL_F32: Basename = "mul.f32"; break; + case RTLIB::DIV_F32: Basename = "div.f32"; break; + } std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); @@ -113,7 +124,17 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) // Unsigned remainder lib call names setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16)); setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32)); - + + // Floating point operations + setLibcallName(RTLIB::FPTOSINT_F32_I32, + getIntrinsicName(RTLIB::FPTOSINT_F32_I32)); + setLibcallName(RTLIB::SINTTOFP_I32_F32, + getIntrinsicName(RTLIB::SINTTOFP_I32_F32)); + setLibcallName(RTLIB::ADD_F32, getIntrinsicName(RTLIB::ADD_F32)); + setLibcallName(RTLIB::SUB_F32, getIntrinsicName(RTLIB::SUB_F32)); + setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32)); + setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32)); + setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); diff --git a/lib/Target/PIC16/PIC16TargetAsmInfo.h b/lib/Target/PIC16/PIC16TargetAsmInfo.h index e464e36..b7292b8 100644 --- a/lib/Target/PIC16/PIC16TargetAsmInfo.h +++ b/lib/Target/PIC16/PIC16TargetAsmInfo.h @@ -33,9 +33,13 @@ namespace llvm { struct PIC16Section { const Section *S_; // Connection to actual Section. unsigned Size; // Total size of the objects contained. + bool SectionPrinted; std::vector<const GlobalVariable*> Items; - PIC16Section (const Section *s) { S_ = s; Size = 0; } + PIC16Section (const Section *s) { S_ = s; Size = 0; + SectionPrinted = false;} + bool isPrinted() { return SectionPrinted ; } + void setPrintedStatus(bool status) { SectionPrinted = status ;} }; struct PIC16TargetAsmInfo : public TargetAsmInfo { diff --git a/lib/Target/TargetELFWriterInfo.cpp b/lib/Target/TargetELFWriterInfo.cpp new file mode 100644 index 0000000..9651e65 --- /dev/null +++ b/lib/Target/TargetELFWriterInfo.cpp @@ -0,0 +1,36 @@ +//===-- lib/Target/TargetELFWriterInfo.cpp - ELF Writer Info --0-*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetELFWriterInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Function.h" +#include "llvm/Target/TargetELFWriterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +TargetELFWriterInfo::TargetELFWriterInfo(TargetMachine &tm) : TM(tm) { + is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + isLittleEndian = TM.getTargetData()->isLittleEndian(); +} + +TargetELFWriterInfo::~TargetELFWriterInfo() {} + +/// getFunctionAlignment - Returns the alignment for function 'F', targets +/// with different alignment constraints should overload this method +unsigned TargetELFWriterInfo::getFunctionAlignment(const Function *F) const { + const TargetData *TD = TM.getTargetData(); + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPointerABIAlignment(); + unsigned Align = std::max(FnAlign, TDAlign); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + return Align; +} diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index dea293b..c487cb8 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -30,6 +30,7 @@ namespace llvm { bool FiniteOnlyFPMathOption; bool HonorSignDependentRoundingFPMathOption; bool UseSoftFloat; + FloatABI::ABIType FloatABIType; bool NoImplicitFloat; bool NoZerosInBSS; bool ExceptionHandling; @@ -84,6 +85,19 @@ GenerateSoftFloatCalls("soft-float", cl::desc("Generate software floating point library calls"), cl::location(UseSoftFloat), cl::init(false)); +static cl::opt<llvm::FloatABI::ABIType, true> +FloatABIForCalls("float-abi", + cl::desc("Choose float ABI type"), + cl::location(FloatABIType), + cl::init(FloatABI::Default), + cl::values( + clEnumValN(FloatABI::Default, "default", + "Target default float ABI type"), + clEnumValN(FloatABI::Soft, "soft", + "Soft float ABI (implied by -soft-float)"), + clEnumValN(FloatABI::Hard, "hard", + "Hard float ABI (uses FP registers)"), + clEnumValEnd)); static cl::opt<bool, true> DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), @@ -162,6 +176,14 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", // TargetMachine Class // +TargetMachine::TargetMachine() + : AsmInfo(0) { + // Typically it will be subtargets that will adjust FloatABIType from Default + // to Soft or Hard. + if (UseSoftFloat) + FloatABIType = FloatABI::Soft; +} + TargetMachine::~TargetMachine() { delete AsmInfo; } diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt index 710bd03..3796aac 100644 --- a/lib/Target/X86/README.txt +++ b/lib/Target/X86/README.txt @@ -482,35 +482,6 @@ _usesbb: //===---------------------------------------------------------------------===// -Currently we don't have elimination of redundant stack manipulations. Consider -the code: - -int %main() { -entry: - call fastcc void %test1( ) - call fastcc void %test2( sbyte* cast (void ()* %test1 to sbyte*) ) - ret int 0 -} - -declare fastcc void %test1() - -declare fastcc void %test2(sbyte*) - - -This currently compiles to: - - subl $16, %esp - call _test5 - addl $12, %esp - subl $16, %esp - movl $_test5, (%esp) - call _test6 - addl $12, %esp - -The add\sub pair is really unneeded here. - -//===---------------------------------------------------------------------===// - Consider the expansion of: define i32 @test3(i32 %X) { @@ -902,34 +873,6 @@ condition register is dead. xor reg reg is shorter than mov reg, #0. //===---------------------------------------------------------------------===// -We aren't matching RMW instructions aggressively -enough. Here's a reduced testcase (more in PR1160): - -define void @test(i32* %huge_ptr, i32* %target_ptr) { - %A = load i32* %huge_ptr ; <i32> [#uses=1] - %B = load i32* %target_ptr ; <i32> [#uses=1] - %C = or i32 %A, %B ; <i32> [#uses=1] - store i32 %C, i32* %target_ptr - ret void -} - -$ llvm-as < t.ll | llc -march=x86-64 - -_test: - movl (%rdi), %eax - orl (%rsi), %eax - movl %eax, (%rsi) - ret - -That should be something like: - -_test: - movl (%rdi), %eax - orl %eax, (%rsi) - ret - -//===---------------------------------------------------------------------===// - The following code: bb114.preheader: ; preds = %cond_next94 @@ -1897,3 +1840,60 @@ The second one is done for: Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" //===---------------------------------------------------------------------===// + +Testcase: +int a(int x) { return (x & 127) > 31; } + +Current output: + movl 4(%esp), %eax + andl $127, %eax + cmpl $31, %eax + seta %al + movzbl %al, %eax + ret + +Ideal output: + xorl %eax, %eax + testl $96, 4(%esp) + setne %al + ret + +We could do this transformation in instcombine, but it's only clearly +beneficial on platforms with a test instruction. + +//===---------------------------------------------------------------------===// +Testcase: +int x(int a) { return (a&0xf0)>>4; } + +Current output: + movl 4(%esp), %eax + shrl $4, %eax + andl $15, %eax + ret + +Ideal output: + movzbl 4(%esp), %eax + shrl $4, %eax + ret + +//===---------------------------------------------------------------------===// + +Testcase: +int x(int a) { return (a & 0x80) ? 0x100 : 0; } + +Current output: + testl $128, 4(%esp) + setne %al + movzbl %al, %eax + shll $8, %eax + ret + +Ideal output: + movl 4(%esp), %eax + addl %eax, %eax + andl $256, %eax + ret + +We generally want to fold shifted tests of a single bit into a shift+and on x86. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 7f99203..e9fcbd5 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -215,50 +215,6 @@ def CC_X86_Win64_C : CallingConv<[ CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>> ]>; -// Tail call convention (fast): One register is reserved for target address, -// namely R9 -def CC_X86_64_TailCall : CallingConv<[ - // Handles byval parameters. - CCIfByVal<CCPassByVal<8, 8>>, - - // Promote i8/i16 arguments to i32. - CCIfType<[i8, i16], CCPromoteToType<i32>>, - - // The 'nest' parameter, if any, is passed in R10. - CCIfNest<CCAssignToReg<[R10]>>, - - // The first 6 integer arguments are passed in integer registers. - CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D]>>, - CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>, - - // The first 8 FP/Vector arguments are passed in XMM registers. - CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCIfSubtarget<"hasSSE1()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 MMX (except for v1i64) vector arguments are passed in XMM - // registers on Darwin. - CCIfType<[v8i8, v4i16, v2i32, v2f32], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>, - - // The first 8 v1i64 vector arguments are passed in GPRs on Darwin. - CCIfType<[v1i64], - CCIfSubtarget<"isTargetDarwin()", - CCAssignToReg<[RDI, RSI, RDX, RCX, R8]>>>, - - // Integer/FP values get stored in stack slots that are 8 bytes in size and - // 8-byte aligned if there are no more registers to hold them. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>, - - // Vectors get 16-byte stack slots that are 16-byte aligned. - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>, - - // __m64 vectors get 8-byte stack slots that are 8-byte aligned. - CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>> -]>; - - //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 2604741..d84034b 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -12,8 +12,27 @@ //===----------------------------------------------------------------------===// #include "X86ELFWriterInfo.h" +#include "llvm/Function.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) : - TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {} +X86ELFWriterInfo::X86ELFWriterInfo(TargetMachine &TM) + : TargetELFWriterInfo(TM) { + bool is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; + EMachine = is64Bit ? EM_X86_64 : EM_386; + } + X86ELFWriterInfo::~X86ELFWriterInfo() {} + +unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { + unsigned FnAlign = 4; + + if (F->hasFnAttr(Attribute::OptimizeForSize)) + FnAlign = 1; + + if (F->getAlignment()) + FnAlign = Log2_32(F->getAlignment()); + + return (1 << FnAlign); +} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index acfa501..e9c5bc4 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,8 +20,10 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { public: - X86ELFWriterInfo(bool is64Bit); + X86ELFWriterInfo(TargetMachine &TM); virtual ~X86ELFWriterInfo(); + + virtual unsigned getFunctionAlignment(const Function *F) const; }; } // end llvm namespace diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 33332e4..2bcfd76 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -171,8 +171,6 @@ CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && isTaillCall) - return CC_X86_64_TailCall; else return CC_X86_64_C; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9e15a54..36e3ab2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -944,7 +944,7 @@ SDValue X86TargetLowering::LowerRET(SDValue Op, SelectionDAG &DAG) { SDValue StackAdjustment = TailCall.getOperand(2); assert(((TargetAddress.getOpcode() == ISD::Register && (cast<RegisterSDNode>(TargetAddress)->getReg() == X86::EAX || - cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R9)) || + cast<RegisterSDNode>(TargetAddress)->getReg() == X86::R11)) || TargetAddress.getOpcode() == ISD::TargetExternalSymbol || TargetAddress.getOpcode() == ISD::TargetGlobalAddress) && "Expecting an global address, external symbol, or register"); @@ -1171,8 +1171,6 @@ CCAssignFn *X86TargetLowering::CCAssignFnForNode(unsigned CC) const { if (Subtarget->is64Bit()) { if (Subtarget->isTargetWin64()) return CC_X86_Win64_C; - else if (CC == CallingConv::Fast && PerformTailCallOpt) - return CC_X86_64_TailCall; else return CC_X86_64_C; } @@ -1799,7 +1797,7 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy()); } else if (IsTailCall) { - unsigned Opc = Is64Bit ? X86::R9 : X86::EAX; + unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, DAG.getRegister(Opc, getPointerTy()), @@ -7696,7 +7694,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, SelectionDAG &DAG, MachineFrameInfo *MFI, const TargetLowering &TLI) { LDBase = NULL; - LastLoadedElt = -1; + LastLoadedElt = -1U; for (unsigned i = 0; i < NumElems; ++i) { if (N->getMaskElt(i) < 0) { if (!LDBase) diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c733f26..6c0074e 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -822,6 +822,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } + unsigned ReadyLabelId = 0; + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + } + // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -831,67 +838,61 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - if (NumBytes) { // Adjust stack pointer: ESP -= numbytes. - if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { - // Check, whether EAX is livein for this function. - bool isEAXAlive = false; - for (MachineRegisterInfo::livein_iterator + // Adjust stack pointer: ESP -= numbytes. + if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { + // Check, whether EAX is livein for this function. + bool isEAXAlive = false; + for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { - unsigned Reg = II->first; - isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); - } + unsigned Reg = II->first; + isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || + Reg == X86::AH || Reg == X86::AL); + } - // Function prologue calls _alloca to probe the stack when allocating more - // than 4k bytes in one go. Touching the stack at 4K increments is - // necessary to ensure that the guard pages used by the OS virtual memory - // manager are allocated in correct sequence. - if (!isEAXAlive) { - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); - } else { - // Save EAX - BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) - .addReg(X86::EAX, RegState::Kill); - - // Allocate NumBytes-4 bytes on stack. We'll also use 4 already - // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) - .addImm(NumBytes-4); - BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) - .addExternalSymbol("_alloca"); - - // Restore EAX - MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), - X86::EAX), - StackPtr, false, NumBytes-4); - MBB.insert(MBBI, MI); - } + // Function prologue calls _alloca to probe the stack when allocating more + // than 4k bytes in one go. Touching the stack at 4K increments is necessary + // to ensure that the guard pages used by the OS virtual memory manager are + // allocated in correct sequence. + if (!isEAXAlive) { + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); } else { - // If there is an SUB32ri of ESP immediately before this instruction, - // merge the two. This can be the case when tail call elimination is - // enabled and the callee has more arguments then the caller. - NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // Save EAX + BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) + .addReg(X86::EAX, RegState::Kill); + + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already + // allocated bytes for EAX. + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes - 4); + BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) + .addExternalSymbol("_alloca"); + + // Restore EAX + MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), + X86::EAX), + StackPtr, false, NumBytes - 4); + MBB.insert(MBBI, MI); + } + } else if (NumBytes) { + // If there is an SUB32ri of ESP immediately before this instruction, merge + // the two. This can be the case when tail call elimination is enabled and + // the callee has more arguments then the caller. + NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); - // If there is an ADD32ri or SUB32ri of ESP immediately after this - // instruction, merge the two instructions. - mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); + // If there is an ADD32ri or SUB32ri of ESP immediately after this + // instruction, merge the two instructions. + mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); - if (NumBytes) - emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); - } + if (NumBytes) + emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, TII); } - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - unsigned ReadyLabelId = 0; - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); + if (needsFrameMoves) emitFrameMoves(MF, FrameLabelId, ReadyLabelId); - } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 03ce1ae..56983ce 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -350,6 +350,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit) , MaxInlineSizeThreshold(128) , Is64Bit(is64Bit) , TargetType(isELF) { // Default to ELF unless otherwise specified. + + // default to hard float ABI + if (FloatABIType == FloatABI::Default) + FloatABIType = FloatABI::Hard; // Determine default and user specified characteristics if (!FS.empty()) { diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 88ab247..dfb055f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,8 +133,7 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), - InstrInfo(*this), JITInfo(*this), TLInfo(*this), - ELFWriterInfo(Subtarget.is64Bit()) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) { DefRelocModel = getRelocationModel(); // FIXME: Correctly select PIC model for Win64 stuff if (getRelocationModel() == Reloc::Default) { diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 2bb6428..a612634 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -127,17 +127,8 @@ bool ArgPromotion::PromoteArguments(CallGraphNode *CGN) { // Second check: make sure that all callers are direct callers. We can't // transform functions that have indirect callers. - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); - UI != E; ++UI) { - CallSite CS = CallSite::get(*UI); - if (!CS.getInstruction()) // "Taking the address" of the function - return false; - - // Ensure that this call site is CALLING the function, not passing it as - // an argument. - if (!CS.isCallee(UI)) - return false; - } + if (F->hasAddressTaken()) + return false; // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 666db7e..e480dad 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -175,15 +175,8 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { if (Fn.isDeclaration() || !Fn.hasLocalLinkage()) return false; // Ensure that the function is only directly called. - for (Value::use_iterator I = Fn.use_begin(), E = Fn.use_end(); I != E; ++I) { - // If this use is anything other than a call site, give up. - CallSite CS = CallSite::get(*I); - Instruction *TheCall = CS.getInstruction(); - if (!TheCall) return false; // Not a direct call site? - - // The addr of this function is passed to the call. - if (!CS.isCallee(I)) return false; - } + if (Fn.hasAddressTaken()) + return false; // Okay, we know we can transform this function if safe. Scan its body // looking for calls to llvm.vastart. diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index db378b0..9c652b9 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -47,7 +47,6 @@ namespace { void GlobalIsNeeded(GlobalValue *GV); void MarkUsedGlobalsAsNeeded(Constant *C); - bool SafeToDestroyConstant(Constant* C); bool RemoveUnusedGlobalValue(GlobalValue &GV); }; } @@ -211,17 +210,3 @@ bool GlobalDCE::RemoveUnusedGlobalValue(GlobalValue &GV) { GV.removeDeadConstantUsers(); return GV.use_empty(); } - -// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used -// by constants itself. Note that constants cannot be cyclic, so this test is -// pretty easy to implement recursively. -// -bool GlobalDCE::SafeToDestroyConstant(Constant *C) { - for (Value::use_iterator I = C->use_begin(), E = C->use_end(); I != E; ++I) - if (Constant *User = dyn_cast<Constant>(*I)) { - if (!SafeToDestroyConstant(User)) return false; - } else { - return false; - } - return true; -} diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 5f12825..9a1b294 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -136,16 +136,16 @@ struct VISIBILITY_HIDDEN GlobalStatus { } -/// ConstantIsDead - Return true if the specified constant is (transitively) -/// dead. The constant may be used by other constants (e.g. constant arrays and -/// constant exprs) as long as they are dead, but it cannot be used by anything -/// else. -static bool ConstantIsDead(Constant *C) { +// SafeToDestroyConstant - It is safe to destroy a constant iff it is only used +// by constants itself. Note that constants cannot be cyclic, so this test is +// pretty easy to implement recursively. +// +static bool SafeToDestroyConstant(Constant *C) { if (isa<GlobalValue>(C)) return false; for (Value::use_iterator UI = C->use_begin(), E = C->use_end(); UI != E; ++UI) if (Constant *CU = dyn_cast<Constant>(*UI)) { - if (!ConstantIsDead(CU)) return false; + if (!SafeToDestroyConstant(CU)) return false; } else return false; return true; @@ -233,7 +233,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS, } else if (Constant *C = dyn_cast<Constant>(*UI)) { GS.HasNonInstructionUser = true; // We might have a dead and dangling constant hanging off of here. - if (!ConstantIsDead(C)) + if (!SafeToDestroyConstant(C)) return true; } else { GS.HasNonInstructionUser = true; @@ -338,7 +338,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { } else if (Constant *C = dyn_cast<Constant>(U)) { // If we have a chain of dead constantexprs or other things dangling from // us, and if they are all dead, nuke them without remorse. - if (ConstantIsDead(C)) { + if (SafeToDestroyConstant(C)) { C->destroyConstant(); // This could have invalidated UI, start over from scratch. CleanupConstantGlobalUsers(V, Init); @@ -354,7 +354,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) { static bool isSafeSROAElementUse(Value *V) { // We might have a dead and dangling constant hanging off of here. if (Constant *C = dyn_cast<Constant>(V)) - return ConstantIsDead(C); + return SafeToDestroyConstant(C); Instruction *I = dyn_cast<Instruction>(V); if (!I) return false; @@ -1769,22 +1769,6 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV, return false; } -/// OnlyCalledDirectly - Return true if the specified function is only called -/// directly. In other words, its address is never taken. -static bool OnlyCalledDirectly(Function *F) { - for (Value::use_iterator UI = F->use_begin(), E = F->use_end(); UI != E;++UI){ - Instruction *User = dyn_cast<Instruction>(*UI); - if (!User) return false; - if (!isa<CallInst>(User) && !isa<InvokeInst>(User)) return false; - - // See if the function address is passed as an argument. - for (User::op_iterator i = User->op_begin() + 1, e = User->op_end(); - i != e; ++i) - if (*i == F) return false; - } - return true; -} - /// ChangeCalleesToFastCall - Walk all of the direct calls of the specified /// function, changing them to FastCC. static void ChangeCalleesToFastCall(Function *F) { @@ -1830,7 +1814,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { ++NumFnDeleted; } else if (F->hasLocalLinkage()) { if (F->getCallingConv() == CallingConv::C && !F->isVarArg() && - OnlyCalledDirectly(F)) { + !F->hasAddressTaken()) { // If this function has C calling conventions, is not a varargs // function, and is only called directly, promote it to use the Fast // calling convention. @@ -1841,7 +1825,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { } if (F->getAttributes().hasAttrSomewhere(Attribute::Nest) && - OnlyCalledDirectly(F)) { + !F->hasAddressTaken()) { // The function is not used by a trampoline intrinsic, so it is safe // to remove the 'nest' attribute. RemoveNestAttribute(F); diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 17bc2d4..5693cc0 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -9,10 +9,6 @@ // // This pass looks for equivalent functions that are mergable and folds them. // -// A Function will not be analyzed if: -// * it is overridable at runtime (except for weak linkage), or -// * it is used by anything other than the callee parameter of a call/invoke -// // A hash is computed from the function, based on its type and number of // basic blocks. // @@ -24,8 +20,6 @@ // When a match is found, the functions are folded. We can only fold two // functions when we know that the definition of one of them is not // overridable. -// * fold a function marked internal by replacing all of its users. -// * fold extern or weak functions by replacing them with a global alias // //===----------------------------------------------------------------------===// // @@ -48,6 +42,7 @@ #define DEBUG_TYPE "mergefunc" #include "llvm/Transforms/IPO.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Constants.h" #include "llvm/InlineAsm.h" @@ -62,7 +57,6 @@ using namespace llvm; STATISTIC(NumFunctionsMerged, "Number of functions merged"); -STATISTIC(NumMergeFails, "Number of identical function pairings not merged"); namespace { struct VISIBILITY_HIDDEN MergeFunctions : public ModulePass { @@ -81,16 +75,169 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } +// ===----------------------------------------------------------------------=== +// Comparison of functions +// ===----------------------------------------------------------------------=== + static unsigned long hash(const Function *F) { - return F->size() ^ reinterpret_cast<unsigned long>(F->getType()); - //return F->size() ^ F->arg_size() ^ F->getReturnType(); + const FunctionType *FTy = F->getFunctionType(); + + FoldingSetNodeID ID; + ID.AddInteger(F->size()); + ID.AddInteger(F->getCallingConv()); + ID.AddBoolean(F->hasGC()); + ID.AddBoolean(FTy->isVarArg()); + ID.AddInteger(FTy->getReturnType()->getTypeID()); + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) + ID.AddInteger(FTy->getParamType(i)->getTypeID()); + return ID.ComputeHash(); +} + +/// IgnoreBitcasts - given a bitcast, returns the first non-bitcast found by +/// walking the chain of cast operands. Otherwise, returns the argument. +static Value* IgnoreBitcasts(Value *V) { + while (BitCastInst *BC = dyn_cast<BitCastInst>(V)) + V = BC->getOperand(0); + + return V; +} + +/// isEquivalentType - any two pointers are equivalent. Otherwise, standard +/// type equivalence rules apply. +static bool isEquivalentType(const Type *Ty1, const Type *Ty2) { + if (Ty1 == Ty2) + return true; + if (Ty1->getTypeID() != Ty2->getTypeID()) + return false; + + switch(Ty1->getTypeID()) { + case Type::VoidTyID: + case Type::FloatTyID: + case Type::DoubleTyID: + case Type::X86_FP80TyID: + case Type::FP128TyID: + case Type::PPC_FP128TyID: + case Type::LabelTyID: + case Type::MetadataTyID: + return true; + + case Type::IntegerTyID: + case Type::OpaqueTyID: + // Ty1 == Ty2 would have returned true earlier. + return false; + + default: + assert(0 && "Unknown type!"); + return false; + + case Type::PointerTyID: { + const PointerType *PTy1 = cast<PointerType>(Ty1); + const PointerType *PTy2 = cast<PointerType>(Ty2); + return PTy1->getAddressSpace() == PTy2->getAddressSpace(); + } + + case Type::StructTyID: { + const StructType *STy1 = cast<StructType>(Ty1); + const StructType *STy2 = cast<StructType>(Ty2); + if (STy1->getNumElements() != STy2->getNumElements()) + return false; + + if (STy1->isPacked() != STy2->isPacked()) + return false; + + for (unsigned i = 0, e = STy1->getNumElements(); i != e; ++i) { + if (!isEquivalentType(STy1->getElementType(i), STy2->getElementType(i))) + return false; + } + return true; + } + + case Type::FunctionTyID: { + const FunctionType *FTy1 = cast<FunctionType>(Ty1); + const FunctionType *FTy2 = cast<FunctionType>(Ty2); + if (FTy1->getNumParams() != FTy2->getNumParams() || + FTy1->isVarArg() != FTy2->isVarArg()) + return false; + + if (!isEquivalentType(FTy1->getReturnType(), FTy2->getReturnType())) + return false; + + for (unsigned i = 0, e = FTy1->getNumParams(); i != e; ++i) { + if (!isEquivalentType(FTy1->getParamType(i), FTy2->getParamType(i))) + return false; + } + return true; + } + + case Type::ArrayTyID: + case Type::VectorTyID: { + const SequentialType *STy1 = cast<SequentialType>(Ty1); + const SequentialType *STy2 = cast<SequentialType>(Ty2); + return isEquivalentType(STy1->getElementType(), STy2->getElementType()); + } + } +} + +/// isEquivalentOperation - determine whether the two operations are the same +/// except that pointer-to-A and pointer-to-B are equivalent. This should be +/// kept in sync with Instruction::isSameOperationAs. +static bool +isEquivalentOperation(const Instruction *I1, const Instruction *I2) { + if (I1->getOpcode() != I2->getOpcode() || + I1->getNumOperands() != I2->getNumOperands() || + !isEquivalentType(I1->getType(), I2->getType())) + return false; + + // We have two instructions of identical opcode and #operands. Check to see + // if all operands are the same type + for (unsigned i = 0, e = I1->getNumOperands(); i != e; ++i) + if (!isEquivalentType(I1->getOperand(i)->getType(), + I2->getOperand(i)->getType())) + return false; + + // Check special state that is a part of some instructions. + if (const LoadInst *LI = dyn_cast<LoadInst>(I1)) + return LI->isVolatile() == cast<LoadInst>(I2)->isVolatile() && + LI->getAlignment() == cast<LoadInst>(I2)->getAlignment(); + if (const StoreInst *SI = dyn_cast<StoreInst>(I1)) + return SI->isVolatile() == cast<StoreInst>(I2)->isVolatile() && + SI->getAlignment() == cast<StoreInst>(I2)->getAlignment(); + if (const CmpInst *CI = dyn_cast<CmpInst>(I1)) + return CI->getPredicate() == cast<CmpInst>(I2)->getPredicate(); + if (const CallInst *CI = dyn_cast<CallInst>(I1)) + return CI->isTailCall() == cast<CallInst>(I2)->isTailCall() && + CI->getCallingConv() == cast<CallInst>(I2)->getCallingConv() && + CI->getAttributes().getRawPointer() == + cast<CallInst>(I2)->getAttributes().getRawPointer(); + if (const InvokeInst *CI = dyn_cast<InvokeInst>(I1)) + return CI->getCallingConv() == cast<InvokeInst>(I2)->getCallingConv() && + CI->getAttributes().getRawPointer() == + cast<InvokeInst>(I2)->getAttributes().getRawPointer(); + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(I1)) { + if (IVI->getNumIndices() != cast<InsertValueInst>(I2)->getNumIndices()) + return false; + for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i) + if (IVI->idx_begin()[i] != cast<InsertValueInst>(I2)->idx_begin()[i]) + return false; + return true; + } + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I1)) { + if (EVI->getNumIndices() != cast<ExtractValueInst>(I2)->getNumIndices()) + return false; + for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i) + if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I2)->idx_begin()[i]) + return false; + return true; + } + + return true; } static bool compare(const Value *V, const Value *U) { assert(!isa<BasicBlock>(V) && !isa<BasicBlock>(U) && "Must not compare basic blocks."); - assert(V->getType() == U->getType() && + assert(isEquivalentType(V->getType(), U->getType()) && "Two of the same operation have operands of different type."); // TODO: If the constant is an expression of F, we should accept that it's @@ -117,20 +264,40 @@ static bool compare(const Value *V, const Value *U) { static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, DenseMap<const Value *, const Value *> &ValueMap, DenseMap<const Value *, const Value *> &SpeculationMap) { - // Specutively add it anyways. If it's false, we'll notice a difference later, and - // this won't matter. + // Speculatively add it anyways. If it's false, we'll notice a difference + // later, and this won't matter. ValueMap[BB1] = BB2; BasicBlock::const_iterator FI = BB1->begin(), FE = BB1->end(); BasicBlock::const_iterator GI = BB2->begin(), GE = BB2->end(); do { - if (!FI->isSameOperationAs(const_cast<Instruction *>(&*GI))) - return false; + if (isa<BitCastInst>(FI)) { + ++FI; + continue; + } + if (isa<BitCastInst>(GI)) { + ++GI; + continue; + } - if (FI->getNumOperands() != GI->getNumOperands()) + if (!isEquivalentOperation(FI, GI)) return false; + if (isa<GetElementPtrInst>(FI)) { + const GetElementPtrInst *GEPF = cast<GetElementPtrInst>(FI); + const GetElementPtrInst *GEPG = cast<GetElementPtrInst>(GI); + if (GEPF->hasAllZeroIndices() && GEPG->hasAllZeroIndices()) { + // It's effectively a bitcast. + ++FI, ++GI; + continue; + } + + // TODO: we only really care about the elements before the index + if (FI->getOperand(0)->getType() != GI->getOperand(0)->getType()) + return false; + } + if (ValueMap[FI] == GI) { ++FI, ++GI; continue; @@ -140,8 +307,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, return false; for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) { - Value *OpF = FI->getOperand(i); - Value *OpG = GI->getOperand(i); + Value *OpF = IgnoreBitcasts(FI->getOperand(i)); + Value *OpG = IgnoreBitcasts(GI->getOperand(i)); if (ValueMap[OpF] == OpG) continue; @@ -149,10 +316,8 @@ static bool equals(const BasicBlock *BB1, const BasicBlock *BB2, if (ValueMap[OpF] != NULL) return false; - assert(OpF->getType() == OpG->getType() && - "Two of the same operation has operands of different type."); - - if (OpF->getValueID() != OpG->getValueID()) + if (OpF->getValueID() != OpG->getValueID() || + !isEquivalentType(OpF->getType(), OpG->getType())) return false; if (isa<PHINode>(FI)) { @@ -203,14 +368,15 @@ static bool equals(const Function *F, const Function *G) { if (F->hasSection() && F->getSection() != G->getSection()) return false; + if (F->isVarArg() != G->isVarArg()) + return false; + // TODO: if it's internal and only used in direct calls, we could handle this // case too. if (F->getCallingConv() != G->getCallingConv()) return false; - // TODO: We want to permit cases where two functions take T* and S* but - // only load or store them into T** and S**. - if (F->getType() != G->getType()) + if (!isEquivalentType(F->getFunctionType(), G->getFunctionType())) return false; DenseMap<const Value *, const Value *> ValueMap; @@ -237,89 +403,213 @@ static bool equals(const Function *F, const Function *G) { return true; } -static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { - if (FnVec[i]->mayBeOverridden() && !FnVec[j]->mayBeOverridden()) - std::swap(FnVec[i], FnVec[j]); - - Function *F = FnVec[i]; - Function *G = FnVec[j]; +// ===----------------------------------------------------------------------=== +// Folding of functions +// ===----------------------------------------------------------------------=== + +// Cases: +// * F is external strong, G is external strong: +// turn G into a thunk to F (1) +// * F is external strong, G is external weak: +// turn G into a thunk to F (1) +// * F is external weak, G is external weak: +// unfoldable +// * F is external strong, G is internal: +// address of G taken: +// turn G into a thunk to F (1) +// address of G not taken: +// make G an alias to F (2) +// * F is internal, G is external weak +// address of F is taken: +// turn G into a thunk to F (1) +// address of F is not taken: +// make G an alias of F (2) +// * F is internal, G is internal: +// address of F and G are taken: +// turn G into a thunk to F (1) +// address of G is not taken: +// make G an alias to F (2) +// +// alias requires linkage == (external,local,weak) fallback to creating a thunk +// external means 'externally visible' linkage != (internal,private) +// internal means linkage == (internal,private) +// weak means linkage mayBeOverridable +// being external implies that the address is taken +// +// 1. turn G into a thunk to F +// 2. make G an alias to F + +enum LinkageCategory { + ExternalStrong, + ExternalWeak, + Internal +}; + +static LinkageCategory categorize(const Function *F) { + switch (F->getLinkage()) { + case GlobalValue::InternalLinkage: + case GlobalValue::PrivateLinkage: + return Internal; + + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + case GlobalValue::ExternalWeakLinkage: + return ExternalWeak; + + case GlobalValue::ExternalLinkage: + case GlobalValue::AvailableExternallyLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::AppendingLinkage: + case GlobalValue::DLLImportLinkage: + case GlobalValue::DLLExportLinkage: + case GlobalValue::GhostLinkage: + case GlobalValue::CommonLinkage: + return ExternalStrong; + } - if (!F->mayBeOverridden()) { - if (G->hasLocalLinkage()) { - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - G->replaceAllUsesWith(F); - G->eraseFromParent(); - ++NumFunctionsMerged; - return true; - } + assert(0 && "Unknown LinkageType."); + return ExternalWeak; +} - if (G->hasExternalLinkage() || G->hasWeakLinkage()) { - GlobalAlias *GA = new GlobalAlias(G->getType(), G->getLinkage(), "", - F, G->getParent()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - GA->takeName(G); - GA->setVisibility(G->getVisibility()); - G->replaceAllUsesWith(GA); - G->eraseFromParent(); - ++NumFunctionsMerged; - return true; +static void ThunkGToF(Function *F, Function *G) { + Function *NewG = Function::Create(G->getFunctionType(), G->getLinkage(), "", + G->getParent()); + BasicBlock *BB = BasicBlock::Create("", NewG); + + std::vector<Value *> Args; + unsigned i = 0; + const FunctionType *FFTy = F->getFunctionType(); + for (Function::arg_iterator AI = NewG->arg_begin(), AE = NewG->arg_end(); + AI != AE; ++AI) { + if (FFTy->getParamType(i) == AI->getType()) + Args.push_back(AI); + else { + Value *BCI = new BitCastInst(AI, FFTy->getParamType(i), "", BB); + Args.push_back(BCI); } + ++i; } - if (F->hasWeakLinkage() && G->hasWeakLinkage()) { - GlobalAlias *GA_F = new GlobalAlias(F->getType(), F->getLinkage(), "", - 0, F->getParent()); - GA_F->takeName(F); - GA_F->setVisibility(F->getVisibility()); - F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); - F->replaceAllUsesWith(GA_F); - F->setName("folded." + GA_F->getName()); - F->setLinkage(GlobalValue::ExternalLinkage); - GA_F->setAliasee(F); - - GlobalAlias *GA_G = new GlobalAlias(G->getType(), G->getLinkage(), "", - F, G->getParent()); - GA_G->takeName(G); - GA_G->setVisibility(G->getVisibility()); - G->replaceAllUsesWith(GA_G); - G->eraseFromParent(); - - ++NumFunctionsMerged; - return true; + CallInst *CI = CallInst::Create(F, Args.begin(), Args.end(), "", BB); + CI->setTailCall(); + CI->setCallingConv(F->getCallingConv()); + if (NewG->getReturnType() == Type::VoidTy) { + ReturnInst::Create(BB); + } else if (CI->getType() != NewG->getReturnType()) { + Value *BCI = new BitCastInst(CI, NewG->getReturnType(), "", BB); + ReturnInst::Create(BCI, BB); + } else { + ReturnInst::Create(CI, BB); } - DOUT << "Failed on " << F->getName() << " and " << G->getName() << "\n"; + NewG->copyAttributesFrom(G); + NewG->takeName(G); + G->replaceAllUsesWith(NewG); + G->eraseFromParent(); - ++NumMergeFails; - return false; + // TODO: look at direct callers to G and make them all direct callers to F. } -static bool hasAddressTaken(User *U) { - for (User::use_iterator I = U->use_begin(), E = U->use_end(); I != E; ++I) { - User *Use = *I; +static void AliasGToF(Function *F, Function *G) { + if (!G->hasExternalLinkage() && !G->hasLocalLinkage() && !G->hasWeakLinkage()) + return ThunkGToF(F, G); + + GlobalAlias *GA = new GlobalAlias( + G->getType(), G->getLinkage(), "", + ConstantExpr::getBitCast(F, G->getType()), G->getParent()); + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + GA->takeName(G); + GA->setVisibility(G->getVisibility()); + G->replaceAllUsesWith(GA); + G->eraseFromParent(); +} - // 'call (bitcast @F to ...)' happens a lot. - while (isa<ConstantExpr>(Use) && Use->hasOneUse()) { - Use = *Use->use_begin(); - } +static bool fold(std::vector<Function *> &FnVec, unsigned i, unsigned j) { + Function *F = FnVec[i]; + Function *G = FnVec[j]; - if (isa<ConstantExpr>(Use)) { - if (hasAddressTaken(Use)) - return true; - } + LinkageCategory catF = categorize(F); + LinkageCategory catG = categorize(G); - if (!isa<CallInst>(Use) && !isa<InvokeInst>(Use)) - return true; + if (catF == ExternalWeak || (catF == Internal && catG == ExternalStrong)) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(catF, catG); + } - // Make sure we aren't passing U as a parameter to call instead of the - // callee. - if (CallSite(cast<Instruction>(Use)).hasArgument(U)) - return true; + switch (catF) { + case ExternalStrong: + switch (catG) { + case ExternalStrong: + case ExternalWeak: + ThunkGToF(F, G); + break; + case Internal: + if (G->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + } + break; + + case ExternalWeak: { + assert(catG == ExternalWeak); + + // Make them both thunks to the same internal function. + F->setAlignment(std::max(F->getAlignment(), G->getAlignment())); + Function *H = Function::Create(F->getFunctionType(), F->getLinkage(), "", + F->getParent()); + H->copyAttributesFrom(F); + H->takeName(F); + F->replaceAllUsesWith(H); + + ThunkGToF(F, G); + ThunkGToF(F, H); + + F->setLinkage(GlobalValue::InternalLinkage); + } break; + + case Internal: + switch (catG) { + case ExternalStrong: + assert(0); + // fall-through + case ExternalWeak: + if (F->hasAddressTaken()) + ThunkGToF(F, G); + else + AliasGToF(F, G); + break; + case Internal: { + bool addrTakenF = F->hasAddressTaken(); + bool addrTakenG = G->hasAddressTaken(); + if (!addrTakenF && addrTakenG) { + std::swap(FnVec[i], FnVec[j]); + std::swap(F, G); + std::swap(addrTakenF, addrTakenG); + } + + if (addrTakenF && addrTakenG) { + ThunkGToF(F, G); + } else { + assert(!addrTakenG); + AliasGToF(F, G); + } + } break; + } + break; } - return false; + ++NumFunctionsMerged; + return true; } +// ===----------------------------------------------------------------------=== +// Pass definition +// ===----------------------------------------------------------------------=== + bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; @@ -329,25 +619,19 @@ bool MergeFunctions::runOnModule(Module &M) { if (F->isDeclaration() || F->isIntrinsic()) continue; - if (!F->hasLocalLinkage() && !F->hasExternalLinkage() && - !F->hasWeakLinkage()) - continue; - - if (hasAddressTaken(F)) - continue; - FnMap[hash(F)].push_back(F); } - // TODO: instead of running in a loop, we could also fold functions in callgraph - // order. Constructing the CFG probably isn't cheaper than just running in a loop. + // TODO: instead of running in a loop, we could also fold functions in + // callgraph order. Constructing the CFG probably isn't cheaper than just + // running in a loop, unless it happened to already be available. bool LocalChanged; do { LocalChanged = false; + DOUT << "size: " << FnMap.size() << "\n"; for (std::map<unsigned long, std::vector<Function *> >::iterator I = FnMap.begin(), E = FnMap.end(); I != E; ++I) { - DOUT << "size: " << FnMap.size() << "\n"; std::vector<Function *> &FnVec = I->second; DOUT << "hash (" << I->first << "): " << FnVec.size() << "\n"; diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp new file mode 100644 index 0000000..b3a25540 --- /dev/null +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -0,0 +1,171 @@ +//===- PartialInlining.cpp - Inline parts of functions --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass performs partial inlining, typically by inlining an if statement +// that surrounds the body of the function. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "partialinlining" +#include "llvm/Transforms/IPO.h" +#include "llvm/Instructions.h" +#include "llvm/Module.h" +#include "llvm/Pass.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/FunctionUtils.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/CFG.h" +using namespace llvm; + +namespace { + struct VISIBILITY_HIDDEN PartialInliner : public ModulePass { + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } + static char ID; // Pass identification, replacement for typeid + PartialInliner() : ModulePass(&ID) {} + + bool runOnModule(Module& M); + + private: + Function* unswitchFunction(Function* F); + }; +} + +char PartialInliner::ID = 0; +static RegisterPass<PartialInliner> X("partial-inliner", "Partial Inliner"); + +ModulePass* llvm::createPartialInliningPass() { return new PartialInliner(); } + +Function* PartialInliner::unswitchFunction(Function* F) { + // First, verify that this function is an unswitching candidate... + BasicBlock* entryBlock = F->begin(); + if (!isa<BranchInst>(entryBlock->getTerminator())) + return 0; + + BasicBlock* returnBlock = 0; + BasicBlock* nonReturnBlock = 0; + unsigned returnCount = 0; + for (succ_iterator SI = succ_begin(entryBlock), SE = succ_end(entryBlock); + SI != SE; ++SI) + if (isa<ReturnInst>((*SI)->getTerminator())) { + returnBlock = *SI; + returnCount++; + } else + nonReturnBlock = *SI; + + if (returnCount != 1) + return 0; + + // Clone the function, so that we can hack away on it. + DenseMap<const Value*, Value*> ValueMap; + Function* duplicateFunction = CloneFunction(F, ValueMap); + duplicateFunction->setLinkage(GlobalValue::InternalLinkage); + F->getParent()->getFunctionList().push_back(duplicateFunction); + BasicBlock* newEntryBlock = cast<BasicBlock>(ValueMap[entryBlock]); + BasicBlock* newReturnBlock = cast<BasicBlock>(ValueMap[returnBlock]); + BasicBlock* newNonReturnBlock = cast<BasicBlock>(ValueMap[nonReturnBlock]); + + // Go ahead and update all uses to the duplicate, so that we can just + // use the inliner functionality when we're done hacking. + F->replaceAllUsesWith(duplicateFunction); + + // Special hackery is needed with PHI nodes that have inputs from more than + // one extracted block. For simplicity, just split the PHIs into a two-level + // sequence of PHIs, some of which will go in the extracted region, and some + // of which will go outside. + BasicBlock* preReturn = newReturnBlock; + newReturnBlock = newReturnBlock->splitBasicBlock( + newReturnBlock->getFirstNonPHI()); + BasicBlock::iterator I = preReturn->begin(); + BasicBlock::iterator Ins = newReturnBlock->begin(); + while (I != preReturn->end()) { + PHINode* OldPhi = dyn_cast<PHINode>(I); + if (!OldPhi) break; + + PHINode* retPhi = PHINode::Create(OldPhi->getType(), "", Ins); + OldPhi->replaceAllUsesWith(retPhi); + Ins = newReturnBlock->getFirstNonPHI(); + + retPhi->addIncoming(I, preReturn); + retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock), + newEntryBlock); + OldPhi->removeIncomingValue(newEntryBlock); + + ++I; + } + newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock); + + // Gather up the blocks that we're going to extract. + std::vector<BasicBlock*> toExtract; + toExtract.push_back(newNonReturnBlock); + for (Function::iterator FI = duplicateFunction->begin(), + FE = duplicateFunction->end(); FI != FE; ++FI) + if (&*FI != newEntryBlock && &*FI != newReturnBlock && + &*FI != newNonReturnBlock) + toExtract.push_back(FI); + + // The CodeExtractor needs a dominator tree. + DominatorTree DT; + DT.runOnFunction(*duplicateFunction); + + // Extract the body of the the if. + Function* extractedFunction = ExtractCodeRegion(DT, toExtract); + + // Inline the top-level if test into all callers. + std::vector<User*> Users(duplicateFunction->use_begin(), + duplicateFunction->use_end()); + for (std::vector<User*>::iterator UI = Users.begin(), UE = Users.end(); + UI != UE; ++UI) + if (CallInst* CI = dyn_cast<CallInst>(*UI)) + InlineFunction(CI); + else if (InvokeInst* II = dyn_cast<InvokeInst>(*UI)) + InlineFunction(II); + + // Ditch the duplicate, since we're done with it, and rewrite all remaining + // users (function pointers, etc.) back to the original function. + duplicateFunction->replaceAllUsesWith(F); + duplicateFunction->eraseFromParent(); + + return extractedFunction; +} + +bool PartialInliner::runOnModule(Module& M) { + std::vector<Function*> worklist; + worklist.reserve(M.size()); + for (Module::iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) + if (!FI->use_empty() && !FI->isDeclaration()) + worklist.push_back(&*FI); + + bool changed = false; + while (!worklist.empty()) { + Function* currFunc = worklist.back(); + worklist.pop_back(); + + if (currFunc->use_empty()) continue; + + bool recursive = false; + for (Function::use_iterator UI = currFunc->use_begin(), + UE = currFunc->use_end(); UI != UE; ++UI) + if (Instruction* I = dyn_cast<Instruction>(UI)) + if (I->getParent()->getParent() == currFunc) { + recursive = true; + break; + } + if (recursive) continue; + + + if (Function* newFunc = unswitchFunction(currFunc)) { + worklist.push_back(newFunc); + changed = true; + } + + } + + return changed; +}
\ No newline at end of file diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 83503fd..38b1198 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -168,7 +168,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L, // Expand the code for the iteration count into the preheader of the loop. BasicBlock *Preheader = L->getLoopPreheader(); - Value *ExitCnt = Rewriter.expandCodeFor(RHS, CmpIndVar->getType(), + Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), Preheader->getTerminator()); // Insert a new icmp_ne or icmp_eq instruction before the branch. @@ -392,10 +392,31 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // in this loop, insert a canonical induction variable of the largest size. Value *IndVar = 0; if (NeedCannIV) { + // Check to see if the loop already has a canonical-looking induction + // variable. If one is present and it's wider than the planned canonical + // induction variable, temporarily remove it, so that the Rewriter + // doesn't attempt to reuse it. + PHINode *OldCannIV = L->getCanonicalInductionVariable(); + if (OldCannIV) { + if (SE->getTypeSizeInBits(OldCannIV->getType()) > + SE->getTypeSizeInBits(LargestType)) + OldCannIV->removeFromParent(); + else + OldCannIV = 0; + } + IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType); + ++NumInserted; Changed = true; DOUT << "INDVARS: New CanIV: " << *IndVar; + + // Now that the official induction variable is established, reinsert + // the old canonical-looking variable after it so that the IR remains + // consistent. It will be deleted as part of the dead-PHI deletion at + // the end of the pass. + if (OldCannIV) + OldCannIV->insertAfter(cast<Instruction>(IndVar)); } // If we have a trip count expression, rewrite the loop's exit condition @@ -459,8 +480,8 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, E = List.end(); UI != E; ++UI) { SCEVHandle Offset = UI->getOffset(); Value *Op = UI->getOperandValToReplace(); + const Type *UseTy = Op->getType(); Instruction *User = UI->getUser(); - bool isSigned = UI->isSigned(); // Compute the final addrec to expand into code. SCEVHandle AR = IU->getReplacementExpr(*UI); @@ -471,7 +492,7 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, // Expand loop-invariant values in the loop preheader. They will // be sunk to the exit block later, if possible. NewVal = - Rewriter.expandCodeFor(AR, LargestType, + Rewriter.expandCodeFor(AR, UseTy, L->getLoopPreheader()->getTerminator()); Rewriter.setInsertionPoint(I); ++NumReplaced; @@ -485,74 +506,6 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType, if (!Stride->isLoopInvariant(L)) continue; - const Type *IVTy = Offset->getType(); - const Type *UseTy = Op->getType(); - - // Promote the Offset and Stride up to the canonical induction - // variable's bit width. - SCEVHandle PromotedOffset = Offset; - SCEVHandle PromotedStride = Stride; - if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) { - // It doesn't matter for correctness whether zero or sign extension - // is used here, since the value is truncated away below, but if the - // value is signed, sign extension is more likely to be folded. - if (isSigned) { - PromotedOffset = SE->getSignExtendExpr(PromotedOffset, LargestType); - PromotedStride = SE->getSignExtendExpr(PromotedStride, LargestType); - } else { - PromotedOffset = SE->getZeroExtendExpr(PromotedOffset, LargestType); - // If the stride is obviously negative, use sign extension to - // produce things like x-1 instead of x+255. - if (isa<SCEVConstant>(PromotedStride) && - cast<SCEVConstant>(PromotedStride) - ->getValue()->getValue().isNegative()) - PromotedStride = SE->getSignExtendExpr(PromotedStride, - LargestType); - else - PromotedStride = SE->getZeroExtendExpr(PromotedStride, - LargestType); - } - } - - // Create the SCEV representing the offset from the canonical - // induction variable, still in the canonical induction variable's - // type, so that all expanded arithmetic is done in the same type. - SCEVHandle NewAR = SE->getAddRecExpr(SE->getIntegerSCEV(0, LargestType), - PromotedStride, L); - // Add the PromotedOffset as a separate step, because it may not be - // loop-invariant. - NewAR = SE->getAddExpr(NewAR, PromotedOffset); - - // Expand the addrec into instructions. - Value *V = Rewriter.expandCodeFor(NewAR); - - // Insert an explicit cast if necessary to truncate the value - // down to the original stride type. This is done outside of - // SCEVExpander because in SCEV expressions, a truncate of an - // addrec is always folded. - if (LargestType != IVTy) { - if (SE->getTypeSizeInBits(IVTy) != SE->getTypeSizeInBits(LargestType)) - NewAR = SE->getTruncateExpr(NewAR, IVTy); - if (Rewriter.isInsertedExpression(NewAR)) - V = Rewriter.expandCodeFor(NewAR); - else { - V = Rewriter.InsertCastOfTo(CastInst::getCastOpcode(V, false, - IVTy, false), - V, IVTy); - assert(!isa<SExtInst>(V) && !isa<ZExtInst>(V) && - "LargestType wasn't actually the largest type!"); - // Force the rewriter to use this trunc whenever this addrec - // appears so that it doesn't insert new phi nodes or - // arithmetic in a different type. - Rewriter.addInsertedValue(V, NewAR); - } - } - - DOUT << "INDVARS: Made offset-and-trunc IV for offset " - << *IVTy << " " << *Offset << ": "; - DEBUG(WriteAsOperand(*DOUT, V, false)); - DOUT << "\n"; - // Now expand it into actual Instructions and patch it into place. NewVal = Rewriter.expandCodeFor(AR, UseTy); } diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 6d2ff0e..5465e4a 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -2608,21 +2608,6 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) { else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName()); } - - if (Op1I->hasOneUse()) { - // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression - // is not used by anyone else... - // - if (Op1I->getOpcode() == Instruction::FSub) { - // Swap the two operands of the subexpr... - Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); - Op1I->setOperand(0, IIOp1); - Op1I->setOperand(1, IIOp0); - - // Create the new top level fadd instruction... - return BinaryOperator::CreateFAdd(Op0, Op1); - } - } } return 0; @@ -11824,7 +11809,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { if (SI.isVolatile()) return 0; // Don't hack volatile stores. // store X, null -> turns into 'unreachable' in SimplifyCFG - if (isa<ConstantPointerNull>(Ptr)) { + if (isa<ConstantPointerNull>(Ptr) && + cast<PointerType>(Ptr->getType())->getAddressSpace() == 0) { if (!isa<UndefValue>(Val)) { SI.setOperand(0, UndefValue::get(Val->getType())); if (Instruction *U = dyn_cast<Instruction>(Val)) diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp index b499279..5a85a04 100644 --- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -125,13 +125,17 @@ static bool MarkAliveBlocks(BasicBlock *BB, } } - if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) - if (isa<ConstantPointerNull>(SI->getOperand(1)) || - isa<UndefValue>(SI->getOperand(1))) { + if (StoreInst *SI = dyn_cast<StoreInst>(BBI)) { + Value *Ptr = SI->getOperand(1); + + if (isa<UndefValue>(Ptr) || + (isa<ConstantPointerNull>(Ptr) && + cast<PointerType>(Ptr->getType())->getAddressSpace() == 0)) { ChangeToUnreachable(SI); Changed = true; break; } + } } // Turn invokes that call 'nounwind' functions into ordinary calls. diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index 3a991f6..54bd895 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -364,4 +364,15 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys, #include "llvm/Intrinsics.gen" #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN + /// hasAddressTaken - returns true if there are any uses of this function + /// other than direct calls or invokes to it. +bool Function::hasAddressTaken() const { + for (Value::use_const_iterator I = use_begin(), E = use_end(); I != E; ++I) { + if (I.getOperandNo() != 0 || + (!isa<CallInst>(*I) && !isa<InvokeInst>(*I))) + return true; + } + return false; +} + // vim: sw=2 ai diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 7556b8e..e0764e4 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -218,9 +218,12 @@ bool Instruction::isIdenticalTo(const Instruction *I) const { } // isSameOperationAs +// This should be kept in sync with isEquivalentOperation in +// lib/Transforms/IPO/MergeFunctions.cpp. bool Instruction::isSameOperationAs(const Instruction *I) const { - if (getOpcode() != I->getOpcode() || getType() != I->getType() || - getNumOperands() != I->getNumOperands()) + if (getOpcode() != I->getOpcode() || + getNumOperands() != I->getNumOperands() || + getType() != I->getType()) return false; // We have two instructions of identical opcode and #operands. Check to see diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index b1297ff..e9f2acd 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -276,8 +276,8 @@ namespace { int VT, unsigned ArgNo, std::string &Suffix); void VerifyIntrinsicPrototype(Intrinsic::ID ID, Function *F, unsigned RetNum, unsigned ParamNum, ...); - void VerifyAttrs(Attributes Attrs, const Type *Ty, - bool isReturnValue, const Value *V); + void VerifyParameterAttrs(Attributes Attrs, const Type *Ty, + bool isReturnValue, const Value *V); void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs, const Value *V); @@ -437,22 +437,23 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) { void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) { } -// VerifyAttrs - Check the given parameter attributes for an argument or return +// VerifyParameterAttrs - Check the given attributes for an argument or return // value of the specified type. The value V is printed in error messages. -void Verifier::VerifyAttrs(Attributes Attrs, const Type *Ty, - bool isReturnValue, const Value *V) { +void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty, + bool isReturnValue, const Value *V) { if (Attrs == Attribute::None) return; + Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly; + Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) + + " only applies to the function!", V); + if (isReturnValue) { Attributes RetI = Attrs & Attribute::ParameterOnly; Assert1(!RetI, "Attribute " + Attribute::getAsString(RetI) + " does not apply to return values!", V); } - Attributes FnCheckAttr = Attrs & Attribute::FunctionOnly; - Assert1(!FnCheckAttr, "Attribute " + Attribute::getAsString(FnCheckAttr) + - " only applies to functions!", V); - + for (unsigned i = 0; i < array_lengthof(Attribute::MutuallyIncompatible); ++i) { Attributes MutI = Attrs & Attribute::MutuallyIncompatible[i]; @@ -495,9 +496,9 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT, else if (Attr.Index-1 < FT->getNumParams()) Ty = FT->getParamType(Attr.Index-1); else - break; // VarArgs attributes, don't verify. - - VerifyAttrs(Attr.Attrs, Ty, Attr.Index == 0, V); + break; // VarArgs attributes, verified elsewhere. + + VerifyParameterAttrs(Attr.Attrs, Ty, Attr.Index == 0, V); if (Attr.Attrs & Attribute::Nest) { Assert1(!SawNest, "More than one parameter has attribute nest!", V); @@ -509,10 +510,10 @@ void Verifier::VerifyFunctionAttrs(const FunctionType *FT, } Attributes FAttrs = Attrs.getFnAttributes(); - Assert1(!(FAttrs & (~Attribute::FunctionOnly)), - "Attribute " + Attribute::getAsString(FAttrs) + - " does not apply to function!", V); - + Attributes NotFn = FAttrs & (~Attribute::FunctionOnly); + Assert1(!NotFn, "Attribute " + Attribute::getAsString(NotFn) + + " does not apply to the function!", V); + for (unsigned i = 0; i < array_lengthof(Attribute::MutuallyIncompatible); ++i) { Attributes MutI = FAttrs & Attribute::MutuallyIncompatible[i]; @@ -1025,7 +1026,7 @@ void Verifier::VerifyCallSite(CallSite CS) { for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) { Attributes Attr = Attrs.getParamAttributes(Idx); - VerifyAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I); + VerifyParameterAttrs(Attr, CS.getArgument(Idx-1)->getType(), false, I); Attributes VArgI = Attr & Attribute::VarArgsIncompatible; Assert1(!VArgI, "Attribute " + Attribute::getAsString(VArgI) + |