diff options
Diffstat (limited to 'lib')
76 files changed, 1506 insertions, 891 deletions
diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 2dea7b3..2b34ad3 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -19,7 +19,6 @@ #include "llvm/Support/InstVisitor.h" #include "llvm/Support/Streams.h" #include "llvm/ADT/Statistic.h" -#include <ostream> using namespace llvm; STATISTIC(TotalInsts , "Number of instructions (of all types)"); diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index de6480a..a0d3974 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include <algorithm> -#include <ostream> using namespace llvm; char LoopInfo::ID = 0; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index f7f1849..03c5005 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -80,7 +80,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include <ostream> #include <algorithm> using namespace llvm; @@ -2463,24 +2462,15 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) { ScalarEvolution::BackedgeTakenInfo ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { // If the loop has a non-one exit block count, we can't analyze it. - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() != 1) return UnknownValue; + BasicBlock *ExitBlock = L->getExitBlock(); + if (!ExitBlock) + return UnknownValue; // Okay, there is one exit block. Try to find the condition that causes the // loop to be exited. - BasicBlock *ExitBlock = ExitBlocks[0]; - - BasicBlock *ExitingBlock = 0; - for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock); - PI != E; ++PI) - if (L->contains(*PI)) { - if (ExitingBlock == 0) - ExitingBlock = *PI; - else - return UnknownValue; // More than one block exiting! - } - assert(ExitingBlock && "No exits from loop, something is broken!"); + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) + return UnknownValue; // More than one block exiting! // Okay, we've computed the exiting block. See what condition causes us to // exit. diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 7ba8268..ef77e46 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -644,3 +644,16 @@ Value *SCEVExpander::expand(const SCEV *S) { InsertedExpressions[S] = V; return V; } + +/// getOrInsertCanonicalInductionVariable - This method returns the +/// canonical induction variable of the specified type for the specified +/// loop (inserting one if there is none). A canonical induction variable +/// starts at zero and steps by one on each iteration. +Value * +SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, + const Type *Ty) { + assert(Ty->isInteger() && "Can only insert integer induction variables!"); + SCEVHandle H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty), + SE.getIntegerSCEV(1, Ty), L); + return expand(H); +} diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 29ff8aa..45f97b8 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -771,7 +771,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (I == 0) return false; // (add x, 0.0) is guaranteed to return +0.0, not -0.0. - if (I->getOpcode() == Instruction::Add && + if (I->getOpcode() == Instruction::FAdd && isa<ConstantFP>(I->getOperand(1)) && cast<ConstantFP>(I->getOperand(1))->isNullValue()) return true; diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index f2e6890..c5190ef 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -547,6 +547,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(optsize); KEYWORD(ssp); KEYWORD(sspreq); + KEYWORD(noredzone); + KEYWORD(noimplicitfloat); KEYWORD(type); KEYWORD(opaque); @@ -590,7 +592,9 @@ lltok::Kind LLLexer::LexIdentifier() { if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ UIntVal = Instruction::Enum; return lltok::kw_##STR; } - INSTKEYWORD(add, Add); INSTKEYWORD(sub, Sub); INSTKEYWORD(mul, Mul); + INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); + INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); + INSTKEYWORD(mul, Mul); INSTKEYWORD(fmul, FMul); INSTKEYWORD(udiv, UDiv); INSTKEYWORD(sdiv, SDiv); INSTKEYWORD(fdiv, FDiv); INSTKEYWORD(urem, URem); INSTKEYWORD(srem, SRem); INSTKEYWORD(frem, FRem); INSTKEYWORD(shl, Shl); INSTKEYWORD(lshr, LShr); INSTKEYWORD(ashr, AShr); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 8db4c71..5c44502 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -712,25 +712,26 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) { return Error(AttrLoc, "invalid use of parameter-only attribute"); return false; - case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; - case lltok::kw_signext: Attrs |= Attribute::SExt; break; - case lltok::kw_inreg: Attrs |= Attribute::InReg; break; - case lltok::kw_sret: Attrs |= Attribute::StructRet; break; - case lltok::kw_noalias: Attrs |= Attribute::NoAlias; break; - case lltok::kw_nocapture: Attrs |= Attribute::NoCapture; break; - case lltok::kw_byval: Attrs |= Attribute::ByVal; break; - case lltok::kw_nest: Attrs |= Attribute::Nest; break; - - case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break; - case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break; - case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; - case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; - case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; - case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break; - case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break; - case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break; - case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break; - + case lltok::kw_zeroext: Attrs |= Attribute::ZExt; break; + case lltok::kw_signext: Attrs |= Attribute::SExt; break; + case lltok::kw_inreg: Attrs |= Attribute::InReg; break; + case lltok::kw_sret: Attrs |= Attribute::StructRet; break; + case lltok::kw_noalias: Attrs |= Attribute::NoAlias; break; + case lltok::kw_nocapture: Attrs |= Attribute::NoCapture; break; + case lltok::kw_byval: Attrs |= Attribute::ByVal; break; + case lltok::kw_nest: Attrs |= Attribute::Nest; break; + + case lltok::kw_noreturn: Attrs |= Attribute::NoReturn; break; + case lltok::kw_nounwind: Attrs |= Attribute::NoUnwind; break; + case lltok::kw_noinline: Attrs |= Attribute::NoInline; break; + case lltok::kw_readnone: Attrs |= Attribute::ReadNone; break; + case lltok::kw_readonly: Attrs |= Attribute::ReadOnly; break; + case lltok::kw_alwaysinline: Attrs |= Attribute::AlwaysInline; break; + case lltok::kw_optsize: Attrs |= Attribute::OptimizeForSize; break; + case lltok::kw_ssp: Attrs |= Attribute::StackProtect; break; + case lltok::kw_sspreq: Attrs |= Attribute::StackProtectReq; break; + case lltok::kw_noredzone: Attrs |= Attribute::NoRedZone; break; + case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break; case lltok::kw_align: { unsigned Alignment; @@ -1835,8 +1836,11 @@ bool LLParser::ParseValID(ValID &ID) { // Binary Operators. case lltok::kw_add: + case lltok::kw_fadd: case lltok::kw_sub: + case lltok::kw_fsub: case lltok::kw_mul: + case lltok::kw_fmul: case lltok::kw_udiv: case lltok::kw_sdiv: case lltok::kw_fdiv: @@ -2400,8 +2404,13 @@ bool LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, // Binary Operators. case lltok::kw_add: case lltok::kw_sub: - case lltok::kw_mul: return ParseArithmetic(Inst, PFS, KeywordVal, 0); - + case lltok::kw_mul: + // API compatibility: Accept either integer or floating-point types. + return ParseArithmetic(Inst, PFS, KeywordVal, 0); + case lltok::kw_fadd: + case lltok::kw_fsub: + case lltok::kw_fmul: return ParseArithmetic(Inst, PFS, KeywordVal, 2); + case lltok::kw_udiv: case lltok::kw_sdiv: case lltok::kw_urem: diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index d8bd38a..9335d19 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -80,6 +80,8 @@ namespace lltok { kw_optsize, kw_ssp, kw_sspreq, + kw_noredzone, + kw_noimplicitfloat, kw_type, kw_opaque, @@ -89,7 +91,8 @@ namespace lltok { kw_ueq, kw_une, // Instruction Opcodes (Opcode in UIntVal). - kw_add, kw_sub, kw_mul, kw_udiv, kw_sdiv, kw_fdiv, + kw_add, kw_fadd, kw_sub, kw_fsub, kw_mul, kw_fmul, + kw_udiv, kw_sdiv, kw_fdiv, kw_urem, kw_srem, kw_frem, kw_shl, kw_lshr, kw_ashr, kw_and, kw_or, kw_xor, kw_icmp, kw_fcmp, kw_vicmp, kw_vfcmp, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 1dad04b..3b44f564 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -104,9 +104,12 @@ static int GetDecodedCastOpcode(unsigned Val) { static int GetDecodedBinaryOpcode(unsigned Val, const Type *Ty) { switch (Val) { default: return -1; - case bitc::BINOP_ADD: return Instruction::Add; - case bitc::BINOP_SUB: return Instruction::Sub; - case bitc::BINOP_MUL: return Instruction::Mul; + case bitc::BINOP_ADD: + return Ty->isFPOrFPVector() ? Instruction::FAdd : Instruction::Add; + case bitc::BINOP_SUB: + return Ty->isFPOrFPVector() ? Instruction::FSub : Instruction::Sub; + case bitc::BINOP_MUL: + return Ty->isFPOrFPVector() ? Instruction::FMul : Instruction::Mul; case bitc::BINOP_UDIV: return Instruction::UDiv; case bitc::BINOP_SDIV: return Ty->isFPOrFPVector() ? Instruction::FDiv : Instruction::SDiv; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index bfc029c..9f16728 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -77,9 +77,12 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) { static unsigned GetEncodedBinaryOpcode(unsigned Opcode) { switch (Opcode) { default: assert(0 && "Unknown binary instruction!"); - case Instruction::Add: return bitc::BINOP_ADD; - case Instruction::Sub: return bitc::BINOP_SUB; - case Instruction::Mul: return bitc::BINOP_MUL; + case Instruction::Add: + case Instruction::FAdd: return bitc::BINOP_ADD; + case Instruction::Sub: + case Instruction::FSub: return bitc::BINOP_SUB; + case Instruction::Mul: + case Instruction::FMul: return bitc::BINOP_MUL; case Instruction::UDiv: return bitc::BINOP_UDIV; case Instruction::FDiv: case Instruction::SDiv: return bitc::BINOP_SDIV; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 25217b0..5a66f4b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -20,7 +20,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" -#include <ostream> using namespace llvm; static TimerGroup &getDwarfTimerGroup() { diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index 45e7dd3..f7ca4f4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -21,7 +21,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include <ostream> using namespace llvm; diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h new file mode 100644 index 0000000..bf43622 --- /dev/null +++ b/lib/CodeGen/ELF.h @@ -0,0 +1,186 @@ +//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header contains common, non-processor-specific data structures and +// constants for the ELF file format. +// +// The details of the ELF32 bits in this file are largely based on +// the Tool Interface Standard (TIS) Executable and Linking Format +// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not +// standardized, as far as I can tell. It was largely based on information +// I found in OpenBSD header files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ELF_H +#define CODEGEN_ELF_H + +#include "llvm/Support/DataTypes.h" +#include <cstring> + +namespace llvm { + class GlobalVariable; + + // Identification Indexes + enum { + EI_MAG0 = 0, + EI_MAG1 = 1, + EI_MAG2 = 2, + EI_MAG3 = 3 + }; + + // File types + enum { + ET_NONE = 0, // No file type + ET_REL = 1, // Relocatable file + ET_EXEC = 2, // Executable file + ET_DYN = 3, // Shared object file + ET_CORE = 4, // Core file + ET_LOPROC = 0xff00, // Beginning of processor-specific codes + ET_HIPROC = 0xffff // Processor-specific + }; + + // Object file classes. + enum { + ELFCLASS32 = 1, // 32-bit object file + ELFCLASS64 = 2 // 64-bit object file + }; + + // Object file byte orderings. + enum { + ELFDATA2LSB = 1, // Little-endian object file + ELFDATA2MSB = 2 // Big-endian object file + }; + + // Versioning + enum { + EV_NONE = 0, + EV_CURRENT = 1 + }; + + /// ELFSection - This struct contains information about each section that is + /// emitted to the file. This is eventually turned into the section header + /// table at the end of the file. + struct ELFSection { + + // ELF specific fields + std::string Name; // Name of the section. + unsigned NameIdx; // Index in .shstrtab of name, once emitted. + unsigned Type; + unsigned Flags; + uint64_t Addr; + unsigned Offset; + unsigned Size; + unsigned Link; + unsigned Info; + unsigned Align; + unsigned EntSize; + + // Section Header Flags + enum { + SHF_WRITE = 1 << 0, // Writable + SHF_ALLOC = 1 << 1, // Mapped into the process addr space + SHF_EXECINSTR = 1 << 2, // Executable + SHF_MERGE = 1 << 4, // Might be merged if equal + SHF_STRINGS = 1 << 5, // Contains null-terminated strings + SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index + SHF_LINK_ORDER = 1 << 7, // Preserve order after combining + SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required + SHF_GROUP = 1 << 9, // Section is a member of a group + SHF_TLS = 1 << 10 // Section holds thread-local data + }; + + // Section Types + enum { + SHT_NULL = 0, // No associated section (inactive entry). + SHT_PROGBITS = 1, // Program-defined contents. + SHT_SYMTAB = 2, // Symbol table. + SHT_STRTAB = 3, // String table. + SHT_RELA = 4, // Relocation entries; explicit addends. + SHT_HASH = 5, // Symbol hash table. + SHT_DYNAMIC = 6, // Information for dynamic linking. + SHT_NOTE = 7, // Information about the file. + SHT_NOBITS = 8, // Data occupies no space in the file. + SHT_REL = 9, // Relocation entries; no explicit addends. + SHT_SHLIB = 10, // Reserved. + SHT_DYNSYM = 11, // Symbol table. + SHT_LOPROC = 0x70000000, // Lowest processor architecture-specific type. + SHT_HIPROC = 0x7fffffff, // Highest processor architecture-specific type. + SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. + SHT_HIUSER = 0xffffffff // Highest type reserved for applications. + }; + + // Special section indices. + enum { + SHN_UNDEF = 0, // Undefined, missing, irrelevant, or meaningless + SHN_LORESERVE = 0xff00, // Lowest reserved index + SHN_LOPROC = 0xff00, // Lowest processor-specific index + SHN_HIPROC = 0xff1f, // Highest processor-specific index + SHN_ABS = 0xfff1, // Symbol has absolute value; does not need relocation + SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables + SHN_HIRESERVE = 0xffff // Highest reserved index + }; + + /// SectionIdx - The number of the section in the Section Table. + unsigned short SectionIdx; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + std::vector<unsigned char> SectionData; + + ELFSection(const std::string &name) + : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), + Link(0), Info(0), Align(0), EntSize(0) {} + }; + + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + const GlobalValue *GV; // The global value this corresponds to. + + // ELF specific fields + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + uint8_t Info; + uint8_t Other; + unsigned short SectionIdx; + + enum { + STB_LOCAL = 0, + STB_GLOBAL = 1, + STB_WEAK = 2 + }; + + enum { + STT_NOTYPE = 0, + STT_OBJECT = 1, + STT_FUNC = 2, + STT_SECTION = 3, + STT_FILE = 4 + }; + + ELFSym(const GlobalValue *gv) : GV(gv), Value(0), + Size(0), Info(0), Other(0), + SectionIdx(ELFSection::SHN_UNDEF) {} + + void SetBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + void SetType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + }; + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 0a0245f..9af276b 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "elfce" + #include "ELFCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Mangler.h" -#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Debug.h" //===----------------------------------------------------------------------===// // ELFCodeEmitter Implementation @@ -27,67 +27,87 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. -void ELFCodeEmitter::startFunction(MachineFunction &F) { - // Align the output buffer to the appropriate alignment. - unsigned Align = 16; // FIXME: GENERICIZE!! +void ELFCodeEmitter::startFunction(MachineFunction &MF) { + const TargetData *TD = TM.getTargetData(); + const Function *F = MF.getFunction(); + + // Align the output buffer to the appropriate alignment, power of 2. + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); + unsigned Align = std::max(FnAlign, TDAlign); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + // Get the ELF Section that this function belongs in. - ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS, - ELFWriter::ELFSection::SHF_EXECINSTR | - ELFWriter::ELFSection::SHF_ALLOC); - OutBuffer = &ES->SectionData; - cerr << "FIXME: This code needs to be updated for changes in the " - << "CodeEmitter interfaces. In particular, this should set " - << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!"; - abort(); + ES = &EW.getTextSection(); + + // FIXME: better memory management, this will be replaced by BinaryObjects + ES->SectionData.reserve(4096); + BufferBegin = &ES->SectionData[0]; + BufferEnd = BufferBegin + ES->SectionData.capacity(); // Upgrade the section alignment if required. if (ES->Align < Align) ES->Align = Align; - // Add padding zeros to the end of the buffer to make sure that the - // function will start on the correct byte alignment within the section. - OutputBuffer OB(*OutBuffer, - TM.getTargetData()->getPointerSizeInBits() == 64, - TM.getTargetData()->isLittleEndian()); - OB.align(Align); - FnStart = OutBuffer->size(); + // Round the size up to the correct alignment for starting the new function. + ES->Size = (ES->Size + (Align-1)) & (-Align); + + // Snaity check on allocated space for text section + assert( ES->Size < 4096 && "no more space in TextSection" ); + + // FIXME: Using ES->Size directly here instead of calculating it from the + // output buffer size (impossible because the code emitter deals only in raw + // bytes) forces us to manually synchronize size and write padding zero bytes + // to the output buffer for all non-text sections. For text sections, we do + // not synchonize the output buffer, and we just blow up if anyone tries to + // write non-code to it. An assert should probably be added to + // AddSymbolToSection to prevent calling it on the text section. + CurBufferPtr = BufferBegin + ES->Size; + + // Record function start address relative to BufferBegin + FnStartPtr = CurBufferPtr; } /// finishFunction - This callback is invoked after the function is completely /// finished. -bool ELFCodeEmitter::finishFunction(MachineFunction &F) { - // We now know the size of the function, add a symbol to represent it. - ELFWriter::ELFSym FnSym(F.getFunction()); +bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { + // Add a symbol to represent the function. + ELFSym FnSym(MF.getFunction()); // Figure out the binding (linkage) of the symbol. - switch (F.getFunction()->getLinkage()) { + switch (MF.getFunction()->getLinkage()) { default: // appending linkage is illegal for functions. assert(0 && "Unknown linkage type!"); case GlobalValue::ExternalLinkage: - FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL); + FnSym.SetBind(ELFSym::STB_GLOBAL); break; case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK); + FnSym.SetBind(ELFSym::STB_WEAK); break; case GlobalValue::PrivateLinkage: assert (0 && "PrivateLinkage should not be in the symbol table."); case GlobalValue::InternalLinkage: - FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL); + FnSym.SetBind(ELFSym::STB_LOCAL); break; } - ES->Size = OutBuffer->size(); + // Set the symbol type as a function + FnSym.SetType(ELFSym::STT_FUNC); - FnSym.SetType(ELFWriter::ELFSym::STT_FUNC); FnSym.SectionIdx = ES->SectionIdx; - FnSym.Value = FnStart; // Value = Offset from start of Section. - FnSym.Size = OutBuffer->size()-FnStart; + FnSym.Size = CurBufferPtr-FnStartPtr; + + // Offset from start of Section + FnSym.Value = FnStartPtr-BufferBegin; // Finally, add it to the symtab. EW.SymbolTable.push_back(FnSym); + + // Update Section Size + ES->Size = CurBufferPtr - BufferBegin; return false; } diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h index 11ebcc8..e9ee936 100644 --- a/lib/CodeGen/ELFCodeEmitter.h +++ b/lib/CodeGen/ELFCodeEmitter.h @@ -21,11 +21,10 @@ namespace llvm { class ELFCodeEmitter : public MachineCodeEmitter { ELFWriter &EW; TargetMachine &TM; - ELFWriter::ELFSection *ES; // Section to write to. - std::vector<unsigned char> *OutBuffer; - size_t FnStart; + ELFSection *ES; // Section to write to. + uint8_t *FnStartPtr; public: - explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {} + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} void startFunction(MachineFunction &F); bool finishFunction(MachineFunction &F); diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index be8edce..24f12a3 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -33,6 +33,7 @@ #include "ELFWriter.h" #include "ELFCodeEmitter.h" +#include "ELF.h" #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/DerivedTypes.h" @@ -67,7 +68,8 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(&ID), O(o), TM(tm) { - e_flags = 0; // e_flags defaults to 0, no flags. + e_flags = 0; // e_flags defaults to 0, no flags. + e_machine = TM.getELFWriterInfo()->getEMachine(); is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; isLittleEndian = TM.getTargetData()->isLittleEndian(); @@ -90,24 +92,39 @@ bool ELFWriter::doInitialization(Module &M) { std::vector<unsigned char> &FH = FileHeader; OutputBuffer FHOut(FH, is64Bit, isLittleEndian); - FHOut.outbyte(0x7F); // EI_MAG0 - FHOut.outbyte('E'); // EI_MAG1 - FHOut.outbyte('L'); // EI_MAG2 - FHOut.outbyte('F'); // EI_MAG3 - FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS - FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA - FHOut.outbyte(1); // EI_VERSION - FH.resize(16); // EI_PAD up to 16 bytes. - - // This should change for shared objects. - FHOut.outhalf(1); // e_type = ET_REL - FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined - FHOut.outword(1); // e_version = 1 - FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file - FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o - - ELFHeader_e_shoff_Offset = FH.size(); - FHOut.outaddr(0); // e_shoff + unsigned ElfClass = is64Bit ? ELFCLASS64 : ELFCLASS32; + unsigned ElfEndian = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB; + + // ELF Header + // ---------- + // Fields e_shnum e_shstrndx are only known after all section have + // been emitted. They locations in the ouput buffer are recorded so + // to be patched up later. + // + // Note + // ---- + // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing + // 4 bytes in the former and 8 in the last for *_off and *_addr elf types + + FHOut.outbyte(0x7f); // e_ident[EI_MAG0] + FHOut.outbyte('E'); // e_ident[EI_MAG1] + FHOut.outbyte('L'); // e_ident[EI_MAG2] + FHOut.outbyte('F'); // e_ident[EI_MAG3] + + FHOut.outbyte(ElfClass); // e_ident[EI_CLASS] + FHOut.outbyte(ElfEndian); // e_ident[EI_DATA] + FHOut.outbyte(EV_CURRENT); // e_ident[EI_VERSION] + + FH.resize(16); // e_ident[EI_NIDENT-EI_PAD] + + FHOut.outhalf(ET_REL); // e_type + FHOut.outhalf(e_machine); // e_machine = target + FHOut.outword(EV_CURRENT); // e_version + FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file + FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o + + ELFHdr_e_shoff_Offset = FH.size(); + FHOut.outaddr(0); // e_shoff = sec hdr table off in bytes FHOut.outword(e_flags); // e_flags = whatever the target wants FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size @@ -115,14 +132,16 @@ bool ELFWriter::doInitialization(Module &M) { FHOut.outhalf(0); // e_phnum = # prog header entries = 0 FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size + // e_shnum = # of section header ents + ELFHdr_e_shnum_Offset = FH.size(); + FHOut.outhalf(0); - ELFHeader_e_shnum_Offset = FH.size(); - FHOut.outhalf(0); // e_shnum = # of section header ents - ELFHeader_e_shstrndx_Offset = FH.size(); - FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab' + // e_shstrndx = Section # of '.shstrtab' + ELFHdr_e_shstrndx_Offset = FH.size(); + FHOut.outhalf(0); // Add the null section, which is required to be first in the file. - getSection("", 0, 0); + getSection("", ELFSection::SHT_NULL, 0); // Start up the symbol table. The first entry in the symtab is the null // entry. @@ -334,7 +353,7 @@ void ELFWriter::EmitSectionTableStringTable() { // Now that we know which section number is the .shstrtab section, update the // e_shstrndx entry in the ELF header. OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); - FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset); + FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); // Set the NameIdx of each section in the string table and emit the bytes for // the string table. @@ -386,11 +405,11 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Now that we know where all of the sections will be emitted, set the e_shnum // entry in the ELF header. OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); - FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset); + FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset); // Now that we know the offset in the file of the section table, update the // e_shoff address in the ELF header. - FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset); + FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset); // Now that we know all of the data in the file header, emit it and all of the // sections! diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index 31aa05a..0389185 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -15,6 +15,7 @@ #define ELFWRITER_H #include "llvm/CodeGen/MachineFunctionPass.h" +#include "ELF.h" #include <list> #include <map> @@ -82,10 +83,8 @@ namespace llvm { /// doInitialization - Emit the file header and all of the global variables /// for the module to the ELF file. bool doInitialization(Module &M); - bool runOnMachineFunction(MachineFunction &MF); - /// doFinalization - Now that the module has been completely processed, emit /// the ELF file to 'O'. bool doFinalization(Module &M); @@ -96,53 +95,6 @@ namespace llvm { // as well!). DataBuffer FileHeader; - /// ELFSection - This struct contains information about each section that is - /// emitted to the file. This is eventually turned into the section header - /// table at the end of the file. - struct ELFSection { - std::string Name; // Name of the section. - unsigned NameIdx; // Index in .shstrtab of name, once emitted. - unsigned Type; - unsigned Flags; - uint64_t Addr; - unsigned Offset; - unsigned Size; - unsigned Link; - unsigned Info; - unsigned Align; - unsigned EntSize; - - /// SectionIdx - The number of the section in the Section Table. - /// - unsigned short SectionIdx; - - /// SectionData - The actual data for this section which we are building - /// up for emission to the file. - DataBuffer SectionData; - - enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3, - SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7, - SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 }; - enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 }; - enum { // SHF - ELF Section Header Flags - SHF_WRITE = 1 << 0, // Writable - SHF_ALLOC = 1 << 1, // Mapped into the process addr space - SHF_EXECINSTR = 1 << 2, // Executable - SHF_MERGE = 1 << 4, // Might be merged if equal - SHF_STRINGS = 1 << 5, // Contains null-terminated strings - SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index - SHF_LINK_ORDER = 1 << 7, // Preserve order after combining - SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required - SHF_GROUP = 1 << 9, // Section is a member of a group - SHF_TLS = 1 << 10 // Section holds thread-local data - }; - - ELFSection(const std::string &name) - : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), - Link(0), Info(0), Align(0), EntSize(0) { - } - }; - /// SectionList - This is the list of sections that we have emitted to the /// file. Once the file has been completely built, the section header table /// is constructed from this info. @@ -165,9 +117,15 @@ namespace llvm { SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; + SN->Link = ELFSection::SHN_UNDEF; return *SN; } + ELFSection &getTextSection() { + return getSection(".text", ELFSection::SHT_PROGBITS, + ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC); + } + ELFSection &getDataSection() { return getSection(".data", ELFSection::SHT_PROGBITS, ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); @@ -177,34 +135,6 @@ namespace llvm { ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); } - /// ELFSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct ELFSym { - const GlobalValue *GV; // The global value this corresponds to. - unsigned NameIdx; // Index in .strtab of name, once emitted. - uint64_t Value; - unsigned Size; - unsigned char Info; - unsigned char Other; - unsigned short SectionIdx; - - enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 }; - enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3, - STT_FILE = 4 }; - ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0), - Other(0), SectionIdx(0) {} - - void SetBind(unsigned X) { - assert(X == (X & 0xF) && "Bind value out of range!"); - Info = (Info & 0x0F) | (X << 4); - } - void SetType(unsigned X) { - assert(X == (X & 0xF) && "Type value out of range!"); - Info = (Info & 0xF0) | X; - } - }; - /// SymbolTable - This is the list of symbols we have emitted to the file. /// This actually gets rearranged before emission to the file (to put the /// local symbols first in the list). @@ -214,9 +144,9 @@ namespace llvm { // (e.g. the location of the section table). These members keep track of // the offset in ELFHeader of these various pieces to update and other // locations in the file. - unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header. - unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header. - unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header. + unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header. + unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header. + unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. private: void EmitGlobal(GlobalVariable *GV); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b8c8563..c351593 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" -#include <ostream> using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 8520888..804fae5 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -40,7 +40,6 @@ #include <queue> #include <memory> #include <cmath> -#include <iostream> using namespace llvm; @@ -399,7 +398,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { } ++NumCoalesce; - return SrcReg; + return PhysReg; } return Reg; @@ -543,13 +542,37 @@ void RALinScan::linearScan() // Ignore splited live intervals. if (!isPhys && vrm_->getPreSplitReg(cur.reg)) continue; + + // A register defined by an implicit_def can be liveout the def BB and livein + // to a use BB. Add it to the livein set of the use BB's. + if (!isPhys && cur.empty()) { + if (MachineInstr *DefMI = mri_->getVRegDef(cur.reg)) { + assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF); + MachineBasicBlock *DefMBB = DefMI->getParent(); + SmallPtrSet<MachineBasicBlock*, 4> Seen; + Seen.insert(DefMBB); + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(cur.reg), + re = mri_->reg_end(); ri != re; ++ri) { + MachineInstr *UseMI = &*ri; + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (Seen.insert(UseMBB)) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Adding a virtual register to livein set?"); + UseMBB->addLiveIn(Reg); + } + } + } + } for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); I != E; ++I) { const LiveRange &LR = *I; if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) + if (LiveInMBBs[i] != EntryMBB) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Adding a virtual register to livein set?"); LiveInMBBs[i]->addLiveIn(Reg); + } LiveInMBBs.clear(); } } @@ -1192,7 +1215,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back - unsigned earliestStart = cur->beginNumber(); LiveInterval *earliestStartInterval = cur; // Spill live intervals of virtual regs mapped to the physical register we @@ -1206,19 +1228,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *sli = spillIs.back(); spillIs.pop_back(); DOUT << "\t\t\tspilling(a): " << *sli << '\n'; - earliestStart = std::min(earliestStart, sli->beginNumber()); earliestStartInterval = (earliestStartInterval->beginNumber() < sli->beginNumber()) ? earliestStartInterval : sli; - - if (earliestStartInterval->beginNumber()!=earliestStart) { - epicFail |= true; - std::cerr << "What the 1 - " - << "earliestStart = " << earliestStart - << "earliestStartInterval = " << earliestStartInterval->beginNumber() - << "\n"; - } - + std::vector<LiveInterval*> newIs; if (!NewSpillFramework) { newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_); @@ -1229,20 +1242,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); - if (earliestStartInterval->beginNumber()!=earliestStart) { - epicFail |= true; - std::cerr << "What the 2 - " - << "earliestStart = " << earliestStart - << "earliestStartInterval = " << earliestStartInterval->beginNumber() - << "\n"; - } - if (epicFail) { //abort(); } } - earliestStart = earliestStartInterval->beginNumber(); + unsigned earliestStart = earliestStartInterval->beginNumber(); DOUT << "\t\trolling back to: " << earliestStart << '\n'; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4c1710d..609ec82 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - SDNode *LD1 = getBuildPairElt(N, 0); - if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); MVT LD1VT = LD1->getValueType(0); - SDNode *LD2 = getBuildPairElt(N, 1); const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && // If both are volatile this would reduce the number of volatile loads. // If one is volatile it might be ok, but play conservative and bail out. - !cast<LoadSDNode>(LD1)->isVolatile() && - !cast<LoadSDNode>(LD2)->isVolatile() && + !LD1->isVolatile() && + !LD2->isVolatile() && TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { - LoadSDNode *LD = cast<LoadSDNode>(LD1); - unsigned Align = LD->getAlignment(); + unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> getABITypeAlignment(VT.getTypeForMVT()); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - false, Align); + return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + LD1->getBasePtr(), LD1->getSrcValue(), + LD1->getSrcValueOffset(), false, Align); } return SDValue(); @@ -4019,6 +4018,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul A, 0) -> 0 if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) return N1; + // fold (fmul A, 0) -> 0, vector edition. + if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 6becff3..4a7dbeb 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -639,18 +639,18 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { bool FastISel::SelectOperator(User *I, unsigned Opcode) { switch (Opcode) { - case Instruction::Add: { - ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD; - return SelectBinaryOp(I, Opc); - } - case Instruction::Sub: { - ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB; - return SelectBinaryOp(I, Opc); - } - case Instruction::Mul: { - ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL; - return SelectBinaryOp(I, Opc); - } + case Instruction::Add: + return SelectBinaryOp(I, ISD::ADD); + case Instruction::FAdd: + return SelectBinaryOp(I, ISD::FADD); + case Instruction::Sub: + return SelectBinaryOp(I, ISD::SUB); + case Instruction::FSub: + return SelectBinaryOp(I, ISD::FSUB); + case Instruction::Mul: + return SelectBinaryOp(I, ISD::MUL); + case Instruction::FMul: + return SelectBinaryOp(I, ISD::FMUL); case Instruction::SDiv: return SelectBinaryOp(I, ISD::SDIV); case Instruction::UDiv: diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2cd67e6..5ae183e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -116,6 +116,8 @@ private: /// result. SDValue LegalizeOp(SDValue O); + SDValue OptimizeFloatStore(StoreSDNode *ST); + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform @@ -165,6 +167,7 @@ private: SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandVectorBuildThroughStack(SDNode* Node); void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -681,6 +684,59 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); } +SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3; + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + DebugLoc dl = ST->getDebugLoc(); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + getTypeAction(MVT::i32) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + } else if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (getTypeAction(MVT::i64) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + isVolatile, MinAlign(Alignment, 4U)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + } + } + return SDValue(); +} + /// LegalizeOp - We know that the specified value has a legal type, and /// that its operands are legal. Now ensure that the operation itself /// is legal, recursively ensuring that the operands' operations remain @@ -1293,50 +1349,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { bool isVolatile = ST->isVolatile(); if (!ST->isTruncatingStore()) { - // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' - // FIXME: We shouldn't do this for TargetConstantFP's. - // FIXME: move this to the DAG Combiner! Note that we can't regress due - // to phase ordering between legalized code and the dag combiner. This - // probably means that we need to integrate dag combiner and legalizer - // together. - // We generally can't do this one for long doubles. - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { - if (CFP->getValueType(0) == MVT::f32 && - getTypeAction(MVT::i32) == Legal) { - Tmp3 = DAG.getConstant(CFP->getValueAPF(). - bitcastToAPInt().zextOrTrunc(32), - MVT::i32); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); - break; - } else if (CFP->getValueType(0) == MVT::f64) { - // If this target supports 64-bit registers, do a single 64-bit store. - if (getTypeAction(MVT::i64) == Legal) { - Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - zextOrTrunc(64), MVT::i64); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); - break; - } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { - // Otherwise, if the target supports 32-bit registers, use 2 32-bit - // stores. If the target supports neither 32- nor 64-bits, this - // xform is certainly not worth it. - const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); - SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); - SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); - if (TLI.isBigEndian()) std::swap(Lo, Hi); - - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, - isVolatile, MinAlign(Alignment, 4U)); - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - break; - } - } + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + Result = SDValue(OptStore, 0); + break; } { @@ -1510,6 +1525,46 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); } +SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + // We can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + MVT VT = Node->getValueType(0); + MVT OpVT = Node->getOperand(0).getValueType(); + DebugLoc dl = Node->getDebugLoc(); + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = OpVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, SV, Offset)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); +} + SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1 = Node->getOperand(0); @@ -1853,40 +1908,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { } } - // Otherwise, we can't handle this case efficiently. Allocate a sufficiently - // aligned object on the stack, store each element into it, then load - // the result as a vector. - // Create the stack frame object. - SDValue FIPtr = DAG.CreateStackTemporary(VT); - int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store of each element to the stack slot. - SmallVector<SDValue, 8> Stores; - unsigned TypeByteSize = OpVT.getSizeInBits() / 8; - // Store (in the right endianness) the elements to memory. - for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { - // Ignore undef elements. - if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; - - unsigned Offset = TypeByteSize*i; - - SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); - Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); - - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), - Idx, SV, Offset)); - } - - SDValue StoreChain; - if (!Stores.empty()) // Not all undef elements? - StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - else - StoreChain = DAG.getEntryNode(); - - // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); + // Otherwise, we can't handle this case efficiently. + return ExpandVectorBuildThroughStack(Node); } // ExpandLibCall - Expand a node into a call to a libcall. If the result value @@ -2437,23 +2460,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; case ISD::CONCAT_VECTORS: { - // Use extract/insert/build vector for now. We might try to be - // more clever later. - SmallVector<SDValue, 8> Ops; - unsigned NumOperands = Node->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { - SDValue SubOp = Node->getOperand(i); - MVT VVT = SubOp.getNode()->getValueType(0); - MVT EltVT = VVT.getVectorElementType(); - unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j=0; j < NumSubElem; ++j) { - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, - DAG.getIntPtrConstant(j))); - } - } - Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Ops[0], Ops.size()); - Results.push_back(Tmp1); + Results.push_back(ExpandVectorBuildThroughStack(Node)); break; } case ISD::SCALAR_TO_VECTOR: diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index eb9342c..0c826f6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -356,13 +356,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { unsigned NewOpc = N->getOpcode(); DebugLoc dl = N->getDebugLoc(); - // If we're promoting a UINT to a larger size, check to see if the new node - // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since - // we can use that instead. This allows us to generate better code for - // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not - // legal, such as PowerPC. + // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is + // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT + // and SINT conversions are Custom, there is no way to tell which is preferable. + // We choose SINT because that's the right thing on PPC.) if (N->getOpcode() == ISD::FP_TO_UINT && - !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; @@ -1747,7 +1746,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) LC = RTLIB::SDIV_I32; else if (VT == MVT::i64) LC = RTLIB::SDIV_I64; @@ -1909,7 +1910,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) LC = RTLIB::SREM_I32; else if (VT == MVT::i64) LC = RTLIB::SREM_I64; @@ -1938,7 +1941,9 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::UDIV_I16; + else if (VT == MVT::i32) LC = RTLIB::UDIV_I32; else if (VT == MVT::i64) LC = RTLIB::UDIV_I64; @@ -1956,7 +1961,9 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::UREM_I16; + else if (VT == MVT::i32) LC = RTLIB::UREM_I32; else if (VT == MVT::i64) LC = RTLIB::UREM_I64; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index df9af21..335c73c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -129,6 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); + MVT QueryType; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); @@ -162,8 +163,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ANY_EXTEND: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FNEG: @@ -183,10 +182,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + QueryType = Node->getValueType(0); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + QueryType = Node->getOperand(0).getValueType(); break; } - switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: // "Promote" the operation by bitcasting Result = PromoteVectorOp(Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 195896e..a9adce8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -154,7 +154,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Do not accept an all-undef vector. if (i == e) return false; - // Do not accept build_vectors that aren't all constants or which have non-~0 + // Do not accept build_vectors that aren't all constants or which have non-0 // elements. SDValue Zero = N->getOperand(i); if (isa<ConstantSDNode>(Zero)) { @@ -166,7 +166,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { } else return false; - // Okay, we have at least one ~0 value, check to see if the rest match or are + // Okay, we have at least one 0 value, check to see if the rest match or are // undefs. for (++i; i != e; ++i) if (N->getOperand(i) != Zero && @@ -2807,16 +2807,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, case ISD::ADDC: case ISD::ADDE: case ISD::SUB: - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: return N2; // fold op(arg1, undef) -> undef + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) + return N2; + break; case ISD::MUL: case ISD::AND: case ISD::SRL: @@ -3059,7 +3062,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, isSrcStr = isMemSrcFromString(Src, Str); bool isSrcConst = isa<ConstantSDNode>(Src); bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); - MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr); + MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); if (VT != MVT::iAny) { unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 889d7f5..93750d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -842,20 +842,6 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) { } } -void SelectionDAGLowering::visitAdd(User &I) { - if (I.getType()->isFPOrFPVector()) - visitBinary(I, ISD::FADD); - else - visitBinary(I, ISD::ADD); -} - -void SelectionDAGLowering::visitMul(User &I) { - if (I.getType()->isFPOrFPVector()) - visitBinary(I, ISD::FMUL); - else - visitBinary(I, ISD::MUL); -} - SDValue SelectionDAGLowering::getValue(const Value *V) { SDValue &N = NodeMap[V]; if (N.getNode()) return N; @@ -2161,37 +2147,33 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { } -void SelectionDAGLowering::visitSub(User &I) { +void SelectionDAGLowering::visitFSub(User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); if (isa<VectorType>(Ty)) { if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { const VectorType *DestTy = cast<VectorType>(I.getType()); const Type *ElTy = DestTy->getElementType(); - if (ElTy->isFloatingPoint()) { - unsigned VL = DestTy->getNumElements(); - std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); - Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); - if (CV == CNZ) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } - } - } - } - if (Ty->isFloatingPoint()) { - if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) - if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + unsigned VL = DestTy->getNumElements(); + std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), Op2.getValueType(), Op2)); return; } + } } + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } - visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB); + visitBinary(I, ISD::FSUB); } void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index 578aa591..057c841 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -469,9 +469,12 @@ private: void visitBinary(User &I, unsigned OpCode); void visitShift(User &I, unsigned Opcode); - void visitAdd(User &I); - void visitSub(User &I); - void visitMul(User &I); + void visitAdd(User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(User &I) { visitBinary(I, ISD::FADD); } + void visitSub(User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(User &I); + void visitMul(User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(User &I) { visitBinary(I, ISD::FMUL); } void visitURem(User &I) { visitBinary(I, ISD::UREM); } void visitSRem(User &I) { visitBinary(I, ISD::SREM); } void visitFRem(User &I) { visitBinary(I, ISD::FREM); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3334e53..ab4cd51 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, } -/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is -/// loading 'Bytes' bytes from a location that is 'Dist' units away from the -/// location that the 'Base' load is loading from. -bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, - unsigned Bytes, int Dist, +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist, const MachineFrameInfo *MFI) const { - if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) + if (LD->getChain() != Base->getChain()) return false; MVT VT = LD->getValueType(0); if (VT.getSizeInBits() / 8 != Bytes) @@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } + if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); + if (V && (V->getSExtValue() == Dist*Bytes)) + return true; + } GlobalValue *GV1 = NULL; GlobalValue *GV2 = NULL; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index c31f622..bd6584a 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -33,99 +33,21 @@ STATISTIC(NumSUnfold , "Number of stores unfolded"); STATISTIC(NumModRefUnfold, "Number of modref unfolded"); namespace { - enum RewriterName { simple, local, trivial }; + enum RewriterName { local, trivial }; } static cl::opt<RewriterName> RewriterOpt("rewriter", cl::desc("Rewriter to use: (default: local)"), cl::Prefix, - cl::values(clEnumVal(simple, "simple rewriter"), - clEnumVal(local, "local rewriter"), + cl::values(clEnumVal(local, "local rewriter"), clEnumVal(trivial, "trivial rewriter"), clEnumValEnd), cl::init(local)); VirtRegRewriter::~VirtRegRewriter() {} - -// ****************************** // -// Simple Spiller Implementation // -// ****************************** // - -struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter { - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) { - DOUT << "********** REWRITE MACHINE CODE **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; - const TargetMachine &TM = MF.getTarget(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - - - // LoadedRegs - Keep track of which vregs are loaded, so that we only load - // each vreg once (in the case where a spilled vreg is used by multiple - // operands). This is always smaller than the number of operands to the - // current machine instr, so it should be small. - std::vector<unsigned> LoadedRegs; - - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - DOUT << MBBI->getBasicBlock()->getName() << ":\n"; - MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MII != E; ++MII) { - MachineInstr &MI = *MII; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.getReg()) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned VirtReg = MO.getReg(); - unsigned SubIdx = MO.getSubReg(); - unsigned PhysReg = VRM.getPhys(VirtReg); - unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg; - if (!VRM.isAssignedReg(VirtReg)) { - int StackSlot = VRM.getStackSlot(VirtReg); - const TargetRegisterClass* RC = - MF.getRegInfo().getRegClass(VirtReg); - - if (MO.isUse() && - std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg) - == LoadedRegs.end()) { - TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC); - MachineInstr *LoadMI = prior(MII); - VRM.addSpillSlotUse(StackSlot, LoadMI); - LoadedRegs.push_back(VirtReg); - ++NumLoads; - DOUT << '\t' << *LoadMI; - } - - if (MO.isDef()) { - TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true, - StackSlot, RC); - MachineInstr *StoreMI = next(MII); - VRM.addSpillSlotUse(StackSlot, StoreMI); - ++NumStores; - } - } - MF.getRegInfo().setPhysRegUsed(RReg); - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } else { - MF.getRegInfo().setPhysRegUsed(MO.getReg()); - } - } - } - - DOUT << '\t' << MI; - LoadedRegs.clear(); - } - } - return true; - } -}; /// This class is intended for use with the new spilling framework only. It /// rewrites vreg def/uses to use the assigned preg, but does not insert any @@ -2231,8 +2153,6 @@ llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { default: assert(0 && "Unreachable!"); case local: return new LocalRewriter(); - case simple: - return new SimpleRewriter(); case trivial: return new TrivialRewriter(); } diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 29a05bb..a80513f 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -573,8 +573,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { return GV; } case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: + case Instruction::FSub: case Instruction::Mul: + case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: @@ -605,11 +608,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Type::FloatTyID: switch (CE->getOpcode()) { default: assert(0 && "Invalid float opcode"); abort(); - case Instruction::Add: + case Instruction::FAdd: GV.FloatVal = LHS.FloatVal + RHS.FloatVal; break; - case Instruction::Sub: + case Instruction::FSub: GV.FloatVal = LHS.FloatVal - RHS.FloatVal; break; - case Instruction::Mul: + case Instruction::FMul: GV.FloatVal = LHS.FloatVal * RHS.FloatVal; break; case Instruction::FDiv: GV.FloatVal = LHS.FloatVal / RHS.FloatVal; break; @@ -620,11 +623,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Type::DoubleTyID: switch (CE->getOpcode()) { default: assert(0 && "Invalid double opcode"); abort(); - case Instruction::Add: + case Instruction::FAdd: GV.DoubleVal = LHS.DoubleVal + RHS.DoubleVal; break; - case Instruction::Sub: + case Instruction::FSub: GV.DoubleVal = LHS.DoubleVal - RHS.DoubleVal; break; - case Instruction::Mul: + case Instruction::FMul: GV.DoubleVal = LHS.DoubleVal * RHS.DoubleVal; break; case Instruction::FDiv: GV.DoubleVal = LHS.DoubleVal / RHS.DoubleVal; break; @@ -638,15 +641,15 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { APFloat apfLHS = APFloat(LHS.IntVal); switch (CE->getOpcode()) { default: assert(0 && "Invalid long double opcode"); abort(); - case Instruction::Add: + case Instruction::FAdd: apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; - case Instruction::Sub: + case Instruction::FSub: apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; - case Instruction::Mul: + case Instruction::FMul: apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 765fed2..7dfeae0 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -64,45 +64,35 @@ void Interpreter::initializeExecutionEngine() { Dest.TY##Val = Src1.TY##Val OP Src2.TY##Val; \ break -#define IMPLEMENT_INTEGER_BINOP1(OP, TY) \ - case Type::IntegerTyID: { \ - Dest.IntVal = Src1.IntVal OP Src2.IntVal; \ - break; \ - } - - -static void executeAddInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { +static void executeFAddInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { switch (Ty->getTypeID()) { - IMPLEMENT_INTEGER_BINOP1(+, Ty); IMPLEMENT_BINARY_OPERATOR(+, Float); IMPLEMENT_BINARY_OPERATOR(+, Double); default: - cerr << "Unhandled type for Add instruction: " << *Ty << "\n"; + cerr << "Unhandled type for FAdd instruction: " << *Ty << "\n"; abort(); } } -static void executeSubInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { +static void executeFSubInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { switch (Ty->getTypeID()) { - IMPLEMENT_INTEGER_BINOP1(-, Ty); IMPLEMENT_BINARY_OPERATOR(-, Float); IMPLEMENT_BINARY_OPERATOR(-, Double); default: - cerr << "Unhandled type for Sub instruction: " << *Ty << "\n"; + cerr << "Unhandled type for FSub instruction: " << *Ty << "\n"; abort(); } } -static void executeMulInst(GenericValue &Dest, GenericValue Src1, - GenericValue Src2, const Type *Ty) { +static void executeFMulInst(GenericValue &Dest, GenericValue Src1, + GenericValue Src2, const Type *Ty) { switch (Ty->getTypeID()) { - IMPLEMENT_INTEGER_BINOP1(*, Ty); IMPLEMENT_BINARY_OPERATOR(*, Float); IMPLEMENT_BINARY_OPERATOR(*, Double); default: - cerr << "Unhandled type for Mul instruction: " << *Ty << "\n"; + cerr << "Unhandled type for FMul instruction: " << *Ty << "\n"; abort(); } } @@ -550,11 +540,14 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { GenericValue R; // Result switch (I.getOpcode()) { - case Instruction::Add: executeAddInst (R, Src1, Src2, Ty); break; - case Instruction::Sub: executeSubInst (R, Src1, Src2, Ty); break; - case Instruction::Mul: executeMulInst (R, Src1, Src2, Ty); break; - case Instruction::FDiv: executeFDivInst (R, Src1, Src2, Ty); break; - case Instruction::FRem: executeFRemInst (R, Src1, Src2, Ty); break; + case Instruction::Add: R.IntVal = Src1.IntVal + Src2.IntVal; break; + case Instruction::Sub: R.IntVal = Src1.IntVal - Src2.IntVal; break; + case Instruction::Mul: R.IntVal = Src1.IntVal * Src2.IntVal; break; + case Instruction::FAdd: executeFAddInst(R, Src1, Src2, Ty); break; + case Instruction::FSub: executeFSubInst(R, Src1, Src2, Ty); break; + case Instruction::FMul: executeFMulInst(R, Src1, Src2, Ty); break; + case Instruction::FDiv: executeFDivInst(R, Src1, Src2, Ty); break; + case Instruction::FRem: executeFRemInst(R, Src1, Src2, Ty); break; case Instruction::UDiv: R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break; case Instruction::SDiv: R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break; case Instruction::URem: R.IntVal = Src1.IntVal.urem(Src2.IntVal); break; @@ -1258,18 +1251,21 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, GenericValue Dest; const Type * Ty = CE->getOperand(0)->getType(); switch (CE->getOpcode()) { - case Instruction::Add: executeAddInst (Dest, Op0, Op1, Ty); break; - case Instruction::Sub: executeSubInst (Dest, Op0, Op1, Ty); break; - case Instruction::Mul: executeMulInst (Dest, Op0, Op1, Ty); break; + case Instruction::Add: Dest.IntVal = Op0.IntVal + Op1.IntVal; break; + case Instruction::Sub: Dest.IntVal = Op0.IntVal - Op1.IntVal; break; + case Instruction::Mul: Dest.IntVal = Op0.IntVal * Op1.IntVal; break; + case Instruction::FAdd: executeFAddInst(Dest, Op0, Op1, Ty); break; + case Instruction::FSub: executeFSubInst(Dest, Op0, Op1, Ty); break; + case Instruction::FMul: executeFMulInst(Dest, Op0, Op1, Ty); break; case Instruction::FDiv: executeFDivInst(Dest, Op0, Op1, Ty); break; case Instruction::FRem: executeFRemInst(Dest, Op0, Op1, Ty); break; case Instruction::SDiv: Dest.IntVal = Op0.IntVal.sdiv(Op1.IntVal); break; case Instruction::UDiv: Dest.IntVal = Op0.IntVal.udiv(Op1.IntVal); break; case Instruction::URem: Dest.IntVal = Op0.IntVal.urem(Op1.IntVal); break; case Instruction::SRem: Dest.IntVal = Op0.IntVal.srem(Op1.IntVal); break; - case Instruction::And: Dest.IntVal = Op0.IntVal.And(Op1.IntVal); break; - case Instruction::Or: Dest.IntVal = Op0.IntVal.Or(Op1.IntVal); break; - case Instruction::Xor: Dest.IntVal = Op0.IntVal.Xor(Op1.IntVal); break; + case Instruction::And: Dest.IntVal = Op0.IntVal & Op1.IntVal; break; + case Instruction::Or: Dest.IntVal = Op0.IntVal | Op1.IntVal; break; + case Instruction::Xor: Dest.IntVal = Op0.IntVal ^ Op1.IntVal; break; case Instruction::Shl: Dest.IntVal = Op0.IntVal.shl(Op1.IntVal.getZExtValue()); break; diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index 89131a0..43f23e4 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -551,7 +551,7 @@ namespace { // When outputting a function stub in the context of some other function, we // save BufferBegin/BufferEnd/CurBufferPtr here. - unsigned char *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; + uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr; /// Relocations - These are the relocations that the function needs, as /// emitted. @@ -891,8 +891,11 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C, break; } case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: + case Instruction::FSub: case Instruction::Mul: + case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::URem: @@ -1056,11 +1059,11 @@ bool JITEmitter::finishFunction(MachineFunction &F) { // FnStart is the start of the text, not the start of the constant pool and // other per-function data. - unsigned char *FnStart = - (unsigned char *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction()); + uint8_t *FnStart = + (uint8_t *)TheJIT->getPointerToGlobalIfAvailable(F.getFunction()); // FnEnd is the end of the function's machine code. - unsigned char *FnEnd = CurBufferPtr; + uint8_t *FnEnd = CurBufferPtr; if (!Relocations.empty()) { CurFn = F.getFunction(); @@ -1183,7 +1186,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } else { DOUT << "JIT: Binary code:\n"; DOUT << std::hex; - unsigned char* q = FnStart; + uint8_t* q = FnStart; for (int i = 0; q < FnEnd; q += 4, ++i) { if (i == 4) i = 0; @@ -1221,7 +1224,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) { BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), ActualSize); BufferEnd = BufferBegin+ActualSize; - unsigned char* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd); + uint8_t* FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd); MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); BufferBegin = SavedBufferBegin; @@ -1416,7 +1419,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer, SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - BufferBegin = CurBufferPtr = (unsigned char *)Buffer; + BufferBegin = CurBufferPtr = (uint8_t *)Buffer; BufferEnd = BufferBegin+StubSize+1; } diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 2819b6d..70ccdcc 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -257,9 +257,9 @@ namespace { // When emitting code into a memory block, this is the block. MemoryRangeHeader *CurBlock; - unsigned char *CurStubPtr, *StubBase; - unsigned char *GOTBase; // Target Specific reserved memory - void *DlsymTable; // Stub external symbol information + uint8_t *CurStubPtr, *StubBase; + uint8_t *GOTBase; // Target Specific reserved memory + void *DlsymTable; // Stub external symbol information // Centralize memory block allocation. sys::MemoryBlock getNewMemoryBlock(unsigned size); @@ -273,12 +273,12 @@ namespace { void AllocateGOT(); void SetDlsymTable(void *); - unsigned char *allocateStub(const GlobalValue* F, unsigned StubSize, - unsigned Alignment); + uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment); /// startFunctionBody - When a function starts, allocate a block of free /// executable memory, returning a pointer to it and its actual size. - unsigned char *startFunctionBody(const Function *F, uintptr_t &ActualSize) { + uint8_t *startFunctionBody(const Function *F, uintptr_t &ActualSize) { FreeRangeHeader* candidateBlock = FreeMemoryList; FreeRangeHeader* head = FreeMemoryList; @@ -301,18 +301,18 @@ namespace { // Allocate the entire memory block. FreeMemoryList = candidateBlock->AllocateBlock(); ActualSize = CurBlock->BlockSize-sizeof(MemoryRangeHeader); - return (unsigned char *)(CurBlock+1); + return (uint8_t *)(CurBlock+1); } /// endFunctionBody - The function F is now allocated, and takes the memory /// in the range [FunctionStart,FunctionEnd). - void endFunctionBody(const Function *F, unsigned char *FunctionStart, - unsigned char *FunctionEnd) { + void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd) { assert(FunctionEnd > FunctionStart); - assert(FunctionStart == (unsigned char *)(CurBlock+1) && + assert(FunctionStart == (uint8_t *)(CurBlock+1) && "Mismatched function start/end!"); - uintptr_t BlockSize = FunctionEnd - (unsigned char *)CurBlock; + uintptr_t BlockSize = FunctionEnd - (uint8_t *)CurBlock; FunctionBlocks[F] = CurBlock; // Release the memory at the end of this block that isn't needed. @@ -320,17 +320,17 @@ namespace { } /// allocateSpace - Allocate a memory block of the given size. - unsigned char *allocateSpace(intptr_t Size, unsigned Alignment) { + uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) { CurBlock = FreeMemoryList; FreeMemoryList = FreeMemoryList->AllocateBlock(); - unsigned char *result = (unsigned char *)CurBlock+1; + uint8_t *result = (uint8_t *)CurBlock+1; if (Alignment == 0) Alignment = 1; - result = (unsigned char*)(((intptr_t)result+Alignment-1) & + result = (uint8_t*)(((intptr_t)result+Alignment-1) & ~(intptr_t)(Alignment-1)); - uintptr_t BlockSize = result + Size - (unsigned char *)CurBlock; + uintptr_t BlockSize = result + Size - (uint8_t *)CurBlock; FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); return result; @@ -338,28 +338,26 @@ namespace { /// startExceptionTable - Use startFunctionBody to allocate memory for the /// function's exception table. - unsigned char* startExceptionTable(const Function* F, - uintptr_t &ActualSize) { + uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { return startFunctionBody(F, ActualSize); } /// endExceptionTable - The exception table of F is now allocated, /// and takes the memory in the range [TableStart,TableEnd). - void endExceptionTable(const Function *F, unsigned char *TableStart, - unsigned char *TableEnd, - unsigned char* FrameRegister) { + void endExceptionTable(const Function *F, uint8_t *TableStart, + uint8_t *TableEnd, uint8_t* FrameRegister) { assert(TableEnd > TableStart); - assert(TableStart == (unsigned char *)(CurBlock+1) && + assert(TableStart == (uint8_t *)(CurBlock+1) && "Mismatched table start/end!"); - uintptr_t BlockSize = TableEnd - (unsigned char *)CurBlock; + uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock; TableBlocks[F] = CurBlock; // Release the memory at the end of this block that isn't needed. FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); } - unsigned char *getGOTBase() const { + uint8_t *getGOTBase() const { return GOTBase; } @@ -433,7 +431,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() { sys::MemoryBlock MemBlock = getNewMemoryBlock(16 << 20); #endif - unsigned char *MemBase = static_cast<unsigned char*>(MemBlock.base()); + uint8_t *MemBase = static_cast<uint8_t*>(MemBlock.base()); // Allocate stubs backwards from the base, allocate functions forward // from the base. @@ -492,7 +490,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager() { void DefaultJITMemoryManager::AllocateGOT() { assert(GOTBase == 0 && "Cannot allocate the got multiple times"); - GOTBase = new unsigned char[sizeof(void*) * 8192]; + GOTBase = new uint8_t[sizeof(void*) * 8192]; HasGOT = true; } @@ -508,12 +506,12 @@ DefaultJITMemoryManager::~DefaultJITMemoryManager() { Blocks.clear(); } -unsigned char *DefaultJITMemoryManager::allocateStub(const GlobalValue* F, +uint8_t *DefaultJITMemoryManager::allocateStub(const GlobalValue* F, unsigned StubSize, unsigned Alignment) { CurStubPtr -= StubSize; - CurStubPtr = (unsigned char*)(((intptr_t)CurStubPtr) & - ~(intptr_t)(Alignment-1)); + CurStubPtr = (uint8_t*)(((intptr_t)CurStubPtr) & + ~(intptr_t)(Alignment-1)); if (CurStubPtr < StubBase) { // FIXME: allocate a new block fprintf(stderr, "JIT ran out of memory for function stubs!\n"); diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index 6ac37bc..42e6fda 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" #include "llvm/System/Program.h" +#include "llvm/System/Process.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" @@ -301,6 +302,35 @@ uint64_t raw_fd_ostream::seek(uint64_t off) { return pos; } +raw_ostream &raw_fd_ostream::changeColor(enum Colors colors, bool bold, + bool bg) { + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = + (colors == SAVEDCOLOR) ? sys::Process::OutputBold(bg) + : sys::Process::OutputColor(colors, bold, bg); + if (colorcode) { + unsigned len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + +raw_ostream &raw_fd_ostream::resetColor() { + if (sys::Process::ColorNeedsFlush()) + flush(); + const char *colorcode = sys::Process::ResetColor(); + if (colorcode) { + unsigned len = strlen(colorcode); + write(colorcode, len); + // don't account colors towards output characters + pos -= len; + } + return *this; +} + //===----------------------------------------------------------------------===// // raw_stdout/err_ostream //===----------------------------------------------------------------------===// diff --git a/lib/System/Unix/Process.inc b/lib/System/Unix/Process.inc index 74b9bb8..2da31c9 100644 --- a/lib/System/Unix/Process.inc +++ b/lib/System/Unix/Process.inc @@ -235,3 +235,62 @@ unsigned Process::StandardErrColumns() { return getColumns(2); } + +static bool terminalHasColors() { + if (const char *term = std::getenv("TERM")) { + // Most modern terminals support ANSI escape sequences for colors. + // We could check terminfo, or have a list of known terms that support + // colors, but that would be overkill. + // The user can always ask for no colors by setting TERM to dumb, or + // using a commandline flag. + return strcmp(term, "dumb") != 0; + } + return false; +} + +bool Process::StandardOutHasColors() { + if (!StandardOutIsDisplayed()) + return false; + return terminalHasColors(); +} + +bool Process::StandardErrHasColors() { + if (!StandardErrIsDisplayed()) + return false; + return terminalHasColors(); +} + +bool Process::ColorNeedsFlush() { + // No, we use ANSI escape sequences. + return false; +} + +#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" + +#define ALLCOLORS(FGBG,BOLD) {\ + COLOR(FGBG, "0", BOLD),\ + COLOR(FGBG, "1", BOLD),\ + COLOR(FGBG, "2", BOLD),\ + COLOR(FGBG, "3", BOLD),\ + COLOR(FGBG, "4", BOLD),\ + COLOR(FGBG, "5", BOLD),\ + COLOR(FGBG, "6", BOLD),\ + COLOR(FGBG, "7", BOLD)\ + } + +static const char* colorcodes[2][2][8] = { + { ALLCOLORS("3",""), ALLCOLORS("3","1;") }, + { ALLCOLORS("4",""), ALLCOLORS("4","1;") } +}; + +const char *Process::OutputColor(char code, bool bold, bool bg) { + return colorcodes[bg?1:0][bold?1:0][code&7]; +} + +const char *Process::OutputBold(bool bg) { + return "\033[1m"; +} + +const char *Process::ResetColor() { + return "\033[0m"; +} diff --git a/lib/System/Win32/Process.inc b/lib/System/Win32/Process.inc index e1d7a92..cfbe33c 100644 --- a/lib/System/Win32/Process.inc +++ b/lib/System/Win32/Process.inc @@ -147,4 +147,71 @@ unsigned Process::StandardErrColumns() { return Columns; } +// It always has colors. +bool Process::StandardErrHasColors() { + return StandardErrIsDisplayed(); +} + +bool Process::StandardOutHasColors() { + return StandardOutIsDisplayed(); +} + +namespace { +class DefaultColors +{ + private: + WORD defaultColor; + public: + DefaultColors() + :defaultColor(GetCurrentColor()) {} + static unsigned GetCurrentColor() { + CONSOLE_SCREEN_BUFFER_INFO csbi; + if (GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi)) + return csbi.wAttributes; + return 0; + } + WORD operator()() const { return defaultColor; } +}; + +DefaultColors defaultColors; +} + +bool Process::ColorNeedsFlush() { + return true; +} + +const char *Process::OutputBold(bool bg) { + WORD colors = DefaultColors::GetCurrentColor(); + if (bg) + colors |= BACKGROUND_INTENSITY; + else + colors |= FOREGROUND_INTENSITY; + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors); + return 0; +} + +const char *Process::OutputColor(char code, bool bold, bool bg) { + WORD colors; + if (bg) { + colors = ((code&1) ? BACKGROUND_RED : 0) | + ((code&2) ? BACKGROUND_GREEN : 0 ) | + ((code&4) ? BACKGROUND_BLUE : 0); + if (bold) + colors |= BACKGROUND_INTENSITY; + } else { + colors = ((code&1) ? FOREGROUND_RED : 0) | + ((code&2) ? FOREGROUND_GREEN : 0 ) | + ((code&4) ? FOREGROUND_BLUE : 0); + if (bold) + colors |= FOREGROUND_INTENSITY; + } + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), colors); + return 0; +} + +const char *Process::ResetColor() { + SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors()); + return 0; +} + } diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index 3a038c9..a75ed3b 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -17,7 +17,6 @@ #include "llvm/Type.h" #include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" -#include <ostream> using namespace llvm; ARMConstantPoolValue::ARMConstantPoolValue(GlobalValue *gv, unsigned id, diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 09b8ce0..963ff0d 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -64,11 +64,15 @@ namespace { typedef SmallVector<MemOpQueueEntry,8> MemOpQueue; typedef MemOpQueue::iterator MemOpQueueIter; - SmallVector<MachineBasicBlock::iterator, 4> - MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, - int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps); + bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, bool BaseKill, int Opcode, + ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, + DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs); + void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base, + int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVector<MachineBasicBlock::iterator, 4> &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); @@ -108,16 +112,16 @@ static int getLoadStoreMultipleOpcode(int Opcode) { return 0; } -/// mergeOps - Create and insert a LDM or STM with Base as base register and +/// MergeOps - Create and insert a LDM or STM with Base as base register and /// registers in Regs as the register operands that would be loaded / stored. /// It returns true if the transformation is done. -static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - int Offset, unsigned Base, bool BaseKill, int Opcode, - ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch, - SmallVector<std::pair<unsigned, bool>, 8> &Regs, - const TargetInstrInfo *TII) { - // FIXME would it be better to take a DL from one of the loads arbitrarily? - DebugLoc dl = DebugLoc::getUnknownLoc(); +bool +ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + int Offset, unsigned Base, bool BaseKill, + int Opcode, ARMCC::CondCodes Pred, + unsigned PredReg, unsigned Scratch, DebugLoc dl, + SmallVector<std::pair<unsigned, bool>, 8> &Regs) { // Only a single register to load / store. Don't bother. unsigned NumRegs = Regs.size(); if (NumRegs <= 1) @@ -185,20 +189,21 @@ static bool mergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, /// MergeLDR_STR - Merge a number of load / store instructions into one or more /// load / store multiple instructions. -SmallVector<MachineBasicBlock::iterator, 4> +void ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, - unsigned Base, int Opcode, unsigned Size, - ARMCC::CondCodes Pred, unsigned PredReg, - unsigned Scratch, MemOpQueue &MemOps) { - SmallVector<MachineBasicBlock::iterator, 4> Merges; + unsigned Base, int Opcode, unsigned Size, + ARMCC::CondCodes Pred, unsigned PredReg, + unsigned Scratch, MemOpQueue &MemOps, + SmallVector<MachineBasicBlock::iterator, 4> &Merges) { bool isAM4 = Opcode == ARM::LDR || Opcode == ARM::STR; int Offset = MemOps[SIndex].Offset; int SOffset = Offset; unsigned Pos = MemOps[SIndex].Position; MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI; - unsigned PReg = MemOps[SIndex].MBBI->getOperand(0).getReg(); + DebugLoc dl = Loc->getDebugLoc(); + unsigned PReg = Loc->getOperand(0).getReg(); unsigned PRegNum = ARMRegisterInfo::getRegisterNumbering(PReg); - bool isKill = MemOps[SIndex].MBBI->getOperand(0).isKill(); + bool isKill = Loc->getOperand(0).isKill(); SmallVector<std::pair<unsigned,bool>, 8> Regs; Regs.push_back(std::make_pair(PReg, isKill)); @@ -216,18 +221,17 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, PRegNum = RegNum; } else { // Can't merge this in. Try merge the earlier ones first. - if (mergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, - Scratch, Regs, TII)) { + if (MergeOps(MBB, ++Loc, SOffset, Base, false, Opcode, Pred, PredReg, + Scratch, dl, Regs)) { Merges.push_back(prior(Loc)); for (unsigned j = SIndex; j < i; ++j) { MBB.erase(MemOps[j].MBBI); MemOps[j].Merged = true; } } - SmallVector<MachineBasicBlock::iterator, 4> Merges2 = - MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,MemOps); - Merges.append(Merges2.begin(), Merges2.end()); - return Merges; + MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch, + MemOps, Merges); + return; } if (MemOps[i].Position > Pos) { @@ -237,8 +241,8 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, } bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1; - if (mergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, - Scratch, Regs, TII)) { + if (MergeOps(MBB, ++Loc, SOffset, Base, BaseKill, Opcode, Pred, PredReg, + Scratch, dl, Regs)) { Merges.push_back(prior(Loc)); for (unsigned i = SIndex, e = MemOps.size(); i != e; ++i) { MBB.erase(MemOps[i].MBBI); @@ -246,7 +250,7 @@ ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, } } - return Merges; + return; } /// getInstrPredicate - If instruction is predicated, returns its predicate @@ -530,7 +534,7 @@ static bool mergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (isAM2) // STR_PRE, STR_POST; BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base) - .addReg(MO.getReg(), getKillRegState(BaseKill)) + .addReg(MO.getReg(), getKillRegState(MO.isKill())) .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg); else // FSTMS, FSTMD @@ -590,6 +594,7 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { ARMCC::CondCodes CurrPred = ARMCC::AL; unsigned CurrPredReg = 0; unsigned Position = 0; + SmallVector<MachineBasicBlock::iterator,4> Merges; RS->enterBasicBlock(&MBB); MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); @@ -689,16 +694,16 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { RS->forward(prior(MBBI)); // Merge ops. - SmallVector<MachineBasicBlock::iterator,4> MBBII = - MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, - CurrPred, CurrPredReg, Scratch, MemOps); + Merges.clear(); + MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize, + CurrPred, CurrPredReg, Scratch, MemOps, Merges); // Try folding preceeding/trailing base inc/dec into the generated // LDM/STM ops. - for (unsigned i = 0, e = MBBII.size(); i < e; ++i) - if (mergeBaseUpdateLSMultiple(MBB, MBBII[i], Advance, MBBI)) + for (unsigned i = 0, e = Merges.size(); i < e; ++i) + if (mergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI)) ++NumMerges; - NumMerges += MBBII.size(); + NumMerges += Merges.size(); // Try folding preceeding/trailing base inc/dec into those load/store // that were not merged to form LDM/STM ops. @@ -709,6 +714,13 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { // RS may be pointing to an instruction that's deleted. RS->skipTo(prior(MBBI)); + } else if (NumMemOps == 1) { + // Try folding preceeding/trailing base inc/dec into the single + // load/store. + if (mergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) { + ++NumMerges; + RS->forward(prior(MBBI)); + } } CurrBase = 0; diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index 6662be1..0b0e289 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -100,7 +100,7 @@ public: GPRCS1Frames(0), GPRCS2Frames(0), DPRCSFrames(0), JumpTableUId(0), ConstPoolEntryUId(0) {} - ARMFunctionInfo(MachineFunction &MF) : + explicit ARMFunctionInfo(MachineFunction &MF) : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), Align(isThumb ? 1U : 2U), VarArgsRegSaveSize(0), HasStackFrame(false), diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td index e8daf74..b95d1f9 100644 --- a/lib/Target/ARM/ARMRegisterInfo.td +++ b/lib/Target/ARM/ARMRegisterInfo.td @@ -103,28 +103,28 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, let MethodBodies = [{ // FP is R11, R9 is available. static const unsigned ARM_GPR_AO_1[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, ARM::R10, ARM::R11 }; // FP is R11, R9 is not available. static const unsigned ARM_GPR_AO_2[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R10, ARM::R11 }; // FP is R7, R9 is available. static const unsigned ARM_GPR_AO_3[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R8, ARM::R9, ARM::R10,ARM::R11, ARM::R7 }; // FP is R7, R9 is not available. static const unsigned ARM_GPR_AO_4[] = { - ARM::R3, ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12,ARM::LR, ARM::R4, ARM::R5, ARM::R6, ARM::R8, ARM::R10,ARM::R11, @@ -186,7 +186,7 @@ def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> { // scavenging. let MethodBodies = [{ static const unsigned THUMB_tGPR_AO[] = { - ARM::R2, ARM::R1, ARM::R0, + ARM::R0, ARM::R1, ARM::R2, ARM::R4, ARM::R5, ARM::R6, ARM::R7 }; // FP is R7, only low registers available. diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h index 47de5df..8221fc7 100644 --- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h +++ b/lib/Target/Alpha/AlphaMachineFunctionInfo.h @@ -33,8 +33,8 @@ class AlphaMachineFunctionInfo : public MachineFunctionInfo { public: AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0) {} - AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), - GlobalRetAddr(0) {} + explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), + GlobalRetAddr(0) {} unsigned getGlobalBaseReg() const { return GlobalBaseReg; } void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 4d7b545..5814d27 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -1000,8 +1000,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) { Out << ')'; return; case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: + case Instruction::FSub: case Instruction::Mul: + case Instruction::FMul: case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: @@ -1020,9 +1023,12 @@ void CWriter::printConstant(Constant *CPV, bool Static) { bool NeedsClosingParens = printConstExprCast(CE, Static); printConstantWithCast(CE->getOperand(0), CE->getOpcode()); switch (CE->getOpcode()) { - case Instruction::Add: Out << " + "; break; - case Instruction::Sub: Out << " - "; break; - case Instruction::Mul: Out << " * "; break; + case Instruction::Add: + case Instruction::FAdd: Out << " + "; break; + case Instruction::Sub: + case Instruction::FSub: Out << " - "; break; + case Instruction::Mul: + case Instruction::FMul: Out << " * "; break; case Instruction::URem: case Instruction::SRem: case Instruction::FRem: Out << " % "; break; @@ -1322,8 +1328,6 @@ bool CWriter::printConstExprCast(const ConstantExpr* CE, bool Static) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!Ty->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::URem: case Instruction::UDiv: NeedsExplicitCast = true; break; @@ -1387,8 +1391,6 @@ void CWriter::printConstantWithCast(Constant* CPV, unsigned Opcode) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!OpTy->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::UDiv: case Instruction::URem: @@ -1505,8 +1507,6 @@ bool CWriter::writeInstructionCast(const Instruction &I) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!Ty->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::URem: case Instruction::UDiv: @@ -1552,8 +1552,6 @@ void CWriter::writeOperandWithCast(Value* Operand, unsigned Opcode) { case Instruction::Mul: // We need to cast integer arithmetic so that it is always performed // as unsigned, to avoid undefined behavior on overflow. - if (!OpTy->isIntOrIntVector()) break; - // FALL THROUGH case Instruction::LShr: case Instruction::UDiv: case Instruction::URem: // Cast to unsigned first @@ -2606,6 +2604,10 @@ void CWriter::visitBinaryOperator(Instruction &I) { Out << "-("; writeOperand(BinaryOperator::getNegArgument(cast<BinaryOperator>(&I))); Out << ")"; + } else if (BinaryOperator::isFNeg(&I)) { + Out << "-("; + writeOperand(BinaryOperator::getFNegArgument(cast<BinaryOperator>(&I))); + Out << ")"; } else if (I.getOpcode() == Instruction::FRem) { // Output a call to fmod/fmodf instead of emitting a%b if (I.getType() == Type::FloatTy) @@ -2630,9 +2632,12 @@ void CWriter::visitBinaryOperator(Instruction &I) { writeOperandWithCast(I.getOperand(0), I.getOpcode()); switch (I.getOpcode()) { - case Instruction::Add: Out << " + "; break; - case Instruction::Sub: Out << " - "; break; - case Instruction::Mul: Out << " * "; break; + case Instruction::Add: + case Instruction::FAdd: Out << " + "; break; + case Instruction::Sub: + case Instruction::FSub: Out << " - "; break; + case Instruction::Mul: + case Instruction::FMul: Out << " * "; break; case Instruction::URem: case Instruction::SRem: case Instruction::FRem: Out << " % "; break; diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 4082989..04a6829 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -865,8 +865,11 @@ namespace { Out << "Constant* " << constName << " = ConstantExpr::"; switch (CE->getOpcode()) { case Instruction::Add: Out << "getAdd("; break; + case Instruction::FAdd: Out << "getFAdd("; break; case Instruction::Sub: Out << "getSub("; break; + case Instruction::FSub: Out << "getFSub("; break; case Instruction::Mul: Out << "getMul("; break; + case Instruction::FMul: Out << "getFMul("; break; case Instruction::UDiv: Out << "getUDiv("; break; case Instruction::SDiv: Out << "getSDiv("; break; case Instruction::FDiv: Out << "getFDiv("; break; @@ -1159,8 +1162,11 @@ namespace { break; } case Instruction::Add: + case Instruction::FAdd: case Instruction::Sub: + case Instruction::FSub: case Instruction::Mul: + case Instruction::FMul: case Instruction::UDiv: case Instruction::SDiv: case Instruction::FDiv: @@ -1176,8 +1182,11 @@ namespace { Out << "BinaryOperator* " << iName << " = BinaryOperator::Create("; switch (I->getOpcode()) { case Instruction::Add: Out << "Instruction::Add"; break; + case Instruction::FAdd: Out << "Instruction::FAdd"; break; case Instruction::Sub: Out << "Instruction::Sub"; break; + case Instruction::FSub: Out << "Instruction::FSub"; break; case Instruction::Mul: Out << "Instruction::Mul"; break; + case Instruction::FMul: Out << "Instruction::FMul"; break; case Instruction::UDiv:Out << "Instruction::UDiv"; break; case Instruction::SDiv:Out << "Instruction::SDiv"; break; case Instruction::FDiv:Out << "Instruction::FDiv"; break; diff --git a/lib/Target/IA64/IA64MachineFunctionInfo.h b/lib/Target/IA64/IA64MachineFunctionInfo.h index fb93056..e6254d6 100644 --- a/lib/Target/IA64/IA64MachineFunctionInfo.h +++ b/lib/Target/IA64/IA64MachineFunctionInfo.h @@ -24,7 +24,7 @@ public: // by this machinefunction? (used to compute the appropriate // entry in the 'alloc' instruction at the top of the // machinefunction) - IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; }; + explicit IA64FunctionInfo(MachineFunction& MF) { outRegsUsed=0; }; }; diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp index ada851d..37e5b1e 100644 --- a/lib/Target/MSIL/MSILWriter.cpp +++ b/lib/Target/MSIL/MSILWriter.cpp @@ -1060,12 +1060,15 @@ void MSILWriter::printInstruction(const Instruction* Inst) { break; // Binary case Instruction::Add: + case Instruction::FAdd: printBinaryInstruction("add",Left,Right); break; case Instruction::Sub: + case Instruction::FSub: printBinaryInstruction("sub",Left,Right); break; - case Instruction::Mul: + case Instruction::Mul: + case Instruction::FMul: printBinaryInstruction("mul",Left,Right); break; case Instruction::UDiv: @@ -1322,12 +1325,15 @@ void MSILWriter::printConstantExpr(const ConstantExpr* CE) { printSelectInstruction(CE->getOperand(0),CE->getOperand(1),CE->getOperand(2)); break; case Instruction::Add: + case Instruction::FAdd: printBinaryInstruction("add",left,right); break; case Instruction::Sub: + case Instruction::FSub: printBinaryInstruction("sub",left,right); break; case Instruction::Mul: + case Instruction::FMul: printBinaryInstruction("mul",left,right); break; case Instruction::UDiv: diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h index b94d7e4..1d26ae3 100644 --- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h +++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h @@ -28,7 +28,8 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo { public: MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {} - MSP430MachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {} + explicit MSP430MachineFunctionInfo(MachineFunction &MF) + : CalleeSavedFrameSize(0) {} unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 0f83fd2..ac9a143 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -46,6 +46,16 @@ static const char *getIntrinsicName(unsigned opcode) { case PIC16ISD::MUL_I8: Basename = "mul.i8"; break; case RTLIB::MUL_I16: Basename = "mul.i16"; break; case RTLIB::MUL_I32: Basename = "mul.i32"; break; + + case RTLIB::SDIV_I16: Basename = "sdiv.i16"; break; + case RTLIB::SDIV_I32: Basename = "sdiv.i32"; break; + case RTLIB::UDIV_I16: Basename = "udiv.i16"; break; + case RTLIB::UDIV_I32: Basename = "udiv.i32"; break; + + case RTLIB::SREM_I16: Basename = "srem.i16"; break; + case RTLIB::SREM_I32: Basename = "srem.i32"; break; + case RTLIB::UREM_I16: Basename = "urem.i16"; break; + case RTLIB::UREM_I32: Basename = "urem.i32"; break; } std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL); @@ -90,6 +100,20 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) setLibcallName(RTLIB::MUL_I16, getIntrinsicName(RTLIB::MUL_I16)); setLibcallName(RTLIB::MUL_I32, getIntrinsicName(RTLIB::MUL_I32)); + // Signed division lib call names + setLibcallName(RTLIB::SDIV_I16, getIntrinsicName(RTLIB::SDIV_I16)); + setLibcallName(RTLIB::SDIV_I32, getIntrinsicName(RTLIB::SDIV_I32)); + // Unsigned division lib call names + setLibcallName(RTLIB::UDIV_I16, getIntrinsicName(RTLIB::UDIV_I16)); + setLibcallName(RTLIB::UDIV_I32, getIntrinsicName(RTLIB::UDIV_I32)); + + // Signed remainder lib call names + setLibcallName(RTLIB::SREM_I16, getIntrinsicName(RTLIB::SREM_I16)); + setLibcallName(RTLIB::SREM_I32, getIntrinsicName(RTLIB::SREM_I32)); + // Unsigned remainder lib call names + setLibcallName(RTLIB::UREM_I16, getIntrinsicName(RTLIB::UREM_I16)); + setLibcallName(RTLIB::UREM_I32, getIntrinsicName(RTLIB::UREM_I32)); + setOperationAction(ISD::GlobalAddress, MVT::i16, Custom); setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom); @@ -105,6 +129,7 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM) setOperationAction(ISD::ADDC, MVT::i8, Custom); setOperationAction(ISD::SUBE, MVT::i8, Custom); setOperationAction(ISD::SUBC, MVT::i8, Custom); + setOperationAction(ISD::SUB, MVT::i8, Custom); setOperationAction(ISD::ADD, MVT::i8, Custom); setOperationAction(ISD::ADD, MVT::i16, Custom); @@ -354,21 +379,11 @@ SDValue PIC16TargetLowering::ExpandFrameIndex(SDNode *N, SelectionDAG &DAG) { FrameIndexSDNode *FR = dyn_cast<FrameIndexSDNode>(SDValue(N,0)); // FIXME there isn't really debug info here DebugLoc dl = FR->getDebugLoc(); - // FIXME: Not used. - // int Index = FR->getIndex(); // Expand FrameIndex like GlobalAddress and ExternalSymbol // Also use Offset field for lo and hi parts. The default // offset is zero. - /* - SDValue Offset = DAG.getConstant(0, MVT::i8); - SDValue FI = DAG.getTargetFrameIndex(Index, MVT::i8); - SDValue Lo = DAG.getNode(PIC16ISD::Lo, dl, MVT::i8, FI, Offset); - SDValue Hi = DAG.getNode(PIC16ISD::Hi, dl, MVT::i8, FI, Offset); - return DAG.getNode(ISD::BUILD_PAIR, dl, N->getValueType(0), Lo, Hi); - */ - SDValue ES; int FrameOffset; SDValue FI = SDValue(N,0); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index a7744b8..87f8fb0b4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -227,15 +227,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + // This is just the low 32 bits of a (signed) fp->i64 conversion. + // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); // FIXME: disable this lowered code. This generates 64-bit register values, // and we don't model the fact that the top part is clobbered by calls. We // need to flag these together so that the value isn't live across a call. //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); - - // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); @@ -2858,7 +2857,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { } // FIXME: Split this code up when LegalizeDAGTypes lands. -SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, +SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) { assert(Op.getOperand(0).getValueType().isFloatingPoint()); SDValue Src = Op.getOperand(0); @@ -2867,9 +2866,11 @@ SDValue PPCTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, SDValue Tmp; switch (Op.getValueType().getSimpleVT()) { - default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); + default: assert(0 && "Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: - Tmp = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Src); + Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : + PPCISD::FCTIDZ, + dl, MVT::f64, Src); break; case MVT::i64: Tmp = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Src); @@ -3740,7 +3741,8 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG, + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, Op.getDebugLoc()); case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); @@ -3834,7 +3836,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, return; } case ISD::FP_TO_SINT: - Results.push_back(LowerFP_TO_SINT(SDValue(N, 0), DAG, dl)); + Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; } } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7946474..b6d046f 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -377,7 +377,7 @@ namespace llvm { SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget); SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); - SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); + SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl); SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG); SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG); SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG); diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h index 42883d7..b359dd3 100644 --- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h +++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h @@ -59,7 +59,7 @@ private: bool HasFastCall; public: - PPCFunctionInfo(MachineFunction &MF) + explicit PPCFunctionInfo(MachineFunction &MF) : FramePointerSaveIndex(0), ReturnAddrSaveIndex(0), SpillsCR(false), diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index 5d5beeb..cb31506 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -908,6 +908,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // If we are a leaf function, and use up to 224 bytes of stack space, // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). + bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 1b042dd..dea293b 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -41,7 +41,6 @@ namespace llvm { bool RealignStack; bool DisableJumpTables; bool StrongPHIElim; - bool DisableRedZone; bool AsmVerbosityDefault(false); } @@ -86,11 +85,6 @@ GenerateSoftFloatCalls("soft-float", cl::location(UseSoftFloat), cl::init(false)); static cl::opt<bool, true> -GenerateNoImplicitFloats("no-implicit-float", - cl::desc("Don't generate implicit floating point instructions (x86-only)"), - cl::location(NoImplicitFloat), - cl::init(false)); -static cl::opt<bool, true> DontPlaceZerosInBSS("nozero-initialized-in-bss", cl::desc("Don't place zero-initialized symbols into bss section"), cl::location(NoZerosInBSS), @@ -163,11 +157,6 @@ EnableStrongPHIElim(cl::Hidden, "strong-phi-elim", cl::desc("Use strong PHI elimination."), cl::location(StrongPHIElim), cl::init(false)); -static cl::opt<bool, true> -DisableRedZoneOption("disable-red-zone", - cl::desc("Do not emit code that uses the red zone."), - cl::location(DisableRedZone), - cl::init(false)); //--------------------------------------------------------------------------- // TargetMachine Class diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 4c3cc82..2604741 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -14,5 +14,6 @@ #include "X86ELFWriterInfo.h" using namespace llvm; -X86ELFWriterInfo::X86ELFWriterInfo() : TargetELFWriterInfo(EM_386) {} +X86ELFWriterInfo::X86ELFWriterInfo(bool is64Bit) : + TargetELFWriterInfo(is64Bit ? EM_X86_64 : EM_386) {} X86ELFWriterInfo::~X86ELFWriterInfo() {} diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 06e051a..acfa501 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -20,7 +20,7 @@ namespace llvm { class X86ELFWriterInfo : public TargetELFWriterInfo { public: - X86ELFWriterInfo(); + X86ELFWriterInfo(bool is64Bit); virtual ~X86ELFWriterInfo(); }; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1f507c3..ef60ff5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -126,7 +126,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote); setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote); - if (!UseSoftFloat && !NoImplicitFloat) { + if (!UseSoftFloat) { // SSE has no i16 to fp conversion, only i32 if (X86ScalarSSEf32) { setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote); @@ -550,6 +550,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::FLOG10, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::FEXP2, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_UINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand); } // FIXME: In order to prevent SSE instructions being expanded to MMX ones @@ -734,6 +738,12 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v2f64, Custom); setOperationAction(ISD::SELECT, MVT::v2i64, Custom); + setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); + if (!DisableMMX && Subtarget->hasMMX()) { + setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); + } } if (Subtarget->hasSSE41()) { @@ -868,11 +878,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const { /// determining it. MVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const { + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. - if (!NoImplicitFloat && Subtarget->getStackAlignment() >= 16) { + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + if (!NoImplicitFloatOps && Subtarget->getStackAlignment() >= 16) { if ((isSrcConst || isSrcStr) && Subtarget->hasSSE2() && Size >= 16) return MVT::v4i32; if ((isSrcConst || isSrcStr) && Subtarget->hasSSE1() && Size >= 16) @@ -1404,11 +1417,12 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) { unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, TotalNumXMMRegs); + bool NoImplicitFloatOps = Fn->hasFnAttr(Attribute::NoImplicitFloat); assert(!(NumXMMRegs && !Subtarget->hasSSE1()) && "SSE register cannot be used when SSE is disabled!"); - assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloat) && + assert(!(NumXMMRegs && UseSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"); - if (UseSoftFloat || NoImplicitFloat || !Subtarget->hasSSE1()) + if (UseSoftFloat || NoImplicitFloatOps || !Subtarget->hasSSE1()) // Kernel mode asks for SSE to be disabled, so don't push them // on the stack. TotalNumXMMRegs = 0; @@ -2414,9 +2428,10 @@ bool X86::isUNPCKH_v_undef_Mask(ShuffleVectorSDNode *N) { /// specifies a shuffle of elements that is suitable for input to MOVSS, /// MOVSD, and MOVD, i.e. setting the lowest element. static bool isMOVLMask(const SmallVectorImpl<int> &Mask, MVT VT) { - int NumElts = VT.getVectorNumElements(); - if (NumElts != 2 && NumElts != 4) + if (VT.getVectorElementType().getSizeInBits() < 32) return false; + + int NumElts = VT.getVectorNumElements(); if (!isUndefOrEqual(Mask[0], NumElts)) return false; @@ -3068,7 +3083,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { } // Special case for single non-zero, non-undef, element. - if (NumNonZero == 1 && NumElems <= 4) { + if (NumNonZero == 1) { unsigned Idx = CountTrailingZeros_32(NonZeros); SDValue Item = Op.getOperand(Idx); @@ -3109,15 +3124,24 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { // If we have a constant or non-constant insertion into the low element of // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into // the rest of the elements. This will be matched as movd/movq/movss/movsd - // depending on what the source datatype is. Because we can only get here - // when NumElems <= 4, this only needs to handle i32/f32/i64/f64. - if (Idx == 0 && - // Don't do this for i64 values on x86-32. - (EVT != MVT::i64 || Subtarget->is64Bit())) { - Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); - // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. - return getShuffleVectorZeroOrUndef(Item, 0, NumZero > 0, - Subtarget->hasSSE2(), DAG); + // depending on what the source datatype is. + if (Idx == 0) { + if (NumZero == 0) { + return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + } else if (EVT == MVT::i32 || EVT == MVT::f32 || EVT == MVT::f64 || + (EVT == MVT::i64 && Subtarget->is64Bit())) { + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); + // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector. + return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget->hasSSE2(), + DAG); + } else if (EVT == MVT::i16 || EVT == MVT::i8) { + Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); + MVT MiddleVT = VT.getSizeInBits() == 64 ? MVT::v2i32 : MVT::v4i32; + Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MiddleVT, Item); + Item = getShuffleVectorZeroOrUndef(Item, 0, true, + Subtarget->hasSSE2(), DAG); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Item); + } } // Is it a vector logical left shift? @@ -4248,7 +4272,7 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue N1 = Op.getOperand(1); SDValue N2 = Op.getOperand(2); - if (EVT.getSizeInBits() == 16) { + if (EVT.getSizeInBits() == 16 && isa<ConstantSDNode>(N2)) { // Transform it so it match pinsrw which expects a 16-bit value in a GR32 // as its second argument. if (N1.getValueType() != MVT::i32) @@ -4554,6 +4578,14 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) { SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { MVT SrcVT = Op.getOperand(0).getValueType(); + + if (SrcVT.isVector()) { + if (SrcVT == MVT::v2i32 && Op.getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + assert(SrcVT.getSimpleVT() <= MVT::i64 && SrcVT.getSimpleVT() >= MVT::i16 && "Unknown SINT_TO_FP to lower!"); @@ -4845,6 +4877,14 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { } SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { + if (Op.getValueType().isVector()) { + if (Op.getValueType() == MVT::v2i32 && + Op.getOperand(0).getValueType() == MVT::v2f64) { + return Op; + } + return SDValue(); + } + std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG, true); SDValue FIST = Vals.first, StackSlot = Vals.second; // If FP_TO_INTHelper failed, the node is actually supposed to be Legal. @@ -7675,8 +7715,9 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems, if (Elt.getOpcode() == ISD::UNDEF) continue; - if (!TLI.isConsecutiveLoad(Elt.getNode(), Base, - EVT.getSizeInBits()/8, i, MFI)) + LoadSDNode *LD = cast<LoadSDNode>(Elt); + LoadSDNode *LDBase = cast<LoadSDNode>(Base); + if (!TLI.isConsecutiveLoad(LD, LDBase, EVT.getSizeInBits()/8, i, MFI)) return false; } return true; @@ -7751,44 +7792,82 @@ static SDValue PerformBuildVectorCombine(SDNode *N, SelectionDAG &DAG, MVT VT = N->getValueType(0); MVT EVT = VT.getVectorElementType(); - if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) - // We are looking for load i64 and zero extend. We want to transform - // it before legalizer has a chance to expand it. Also look for i64 - // BUILD_PAIR bit casted to f64. - return SDValue(); - // This must be an insertion into a zero vector. - SDValue HighElt = N->getOperand(1); - if (!isZeroNode(HighElt)) - return SDValue(); + + // Before or during type legalization, we want to try and convert a + // build_vector of an i64 load and a zero value into vzext_movl before the + // legalizer can break it up. + // FIXME: does the case below remove the need to do this? + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) { + if ((EVT != MVT::i64 && EVT != MVT::f64) || Subtarget->is64Bit()) + return SDValue(); + + // This must be an insertion into a zero vector. + SDValue HighElt = N->getOperand(1); + if (!isZeroNode(HighElt)) + return SDValue(); + + // Value must be a load. + SDNode *Base = N->getOperand(0).getNode(); + if (!isa<LoadSDNode>(Base)) { + if (Base->getOpcode() != ISD::BIT_CONVERT) + return SDValue(); + Base = Base->getOperand(0).getNode(); + if (!isa<LoadSDNode>(Base)) + return SDValue(); + } + + // Transform it into VZEXT_LOAD addr. + LoadSDNode *LD = cast<LoadSDNode>(Base); + + // Load must not be an extload. + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + + // Load type should legal type so we don't have to legalize it. + if (!TLI.isTypeLegal(VT)) + return SDValue(); + + SDVTList Tys = DAG.getVTList(VT, MVT::Other); + SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return ResNode; + } + + // The type legalizer will have broken apart v2i64 build_vector created during + // widening before the code which handles that case is run. Look for build + // vector (load, load + 4, 0/undef, 0/undef) + if (VT == MVT::v4i32 || VT == MVT::v4f32) { + LoadSDNode *LD0 = dyn_cast<LoadSDNode>(N->getOperand(0)); + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(N->getOperand(1)); + if (!LD0 || !LD1) + return SDValue(); + if (LD0->getExtensionType() != ISD::NON_EXTLOAD || + LD1->getExtensionType() != ISD::NON_EXTLOAD) + return SDValue(); + // Make sure the second elt is a consecutive load. + if (!TLI.isConsecutiveLoad(LD1, LD0, EVT.getSizeInBits()/8, 1, + DAG.getMachineFunction().getFrameInfo())) + return SDValue(); - // Value must be a load. - SDNode *Base = N->getOperand(0).getNode(); - if (!isa<LoadSDNode>(Base)) { - if (Base->getOpcode() != ISD::BIT_CONVERT) + SDValue N2 = N->getOperand(2); + SDValue N3 = N->getOperand(3); + if (!isZeroNode(N2) && N2.getOpcode() != ISD::UNDEF) return SDValue(); - Base = Base->getOperand(0).getNode(); - if (!isa<LoadSDNode>(Base)) + if (!isZeroNode(N3) && N3.getOpcode() != ISD::UNDEF) return SDValue(); + + SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); + SDValue Ops[] = { LD0->getChain(), LD0->getBasePtr() }; + SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); + TargetLowering::TargetLoweringOpt TLO(DAG); + TLO.CombineTo(SDValue(LD0, 1), ResNode.getValue(1)); + DCI.CommitTargetLoweringOpt(TLO); + return DAG.getNode(ISD::BIT_CONVERT, dl, VT, ResNode); } - - // Transform it into VZEXT_LOAD addr. - LoadSDNode *LD = cast<LoadSDNode>(Base); - - // Load must not be an extload. - if (LD->getExtensionType() != ISD::NON_EXTLOAD) - return SDValue(); - - // Load type should legal type so we don't have to legalize it. - if (!TLI.isTypeLegal(VT)) - return SDValue(); - - SDVTList Tys = DAG.getVTList(VT, MVT::Other); - SDValue Ops[] = { LD->getChain(), LD->getBasePtr() }; - SDValue ResNode = DAG.getNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, 2); - TargetLowering::TargetLoweringOpt TLO(DAG); - TLO.CombineTo(SDValue(Base, 1), ResNode.getValue(1)); - DCI.CommitTargetLoweringOpt(TLO); - return ResNode; + return SDValue(); } /// PerformSELECTCombine - Do target-specific dag combines on SELECT nodes. @@ -8242,7 +8321,10 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, if (VT.getSizeInBits() != 64) return SDValue(); - bool F64IsLegal = !UseSoftFloat && !NoImplicitFloat && Subtarget->hasSSE2(); + const Function *F = DAG.getMachineFunction().getFunction(); + bool NoImplicitFloatOps = F->hasFnAttr(Attribute::NoImplicitFloat); + bool F64IsLegal = !UseSoftFloat && !NoImplicitFloatOps + && Subtarget->hasSSE2(); if ((VT.isVector() || (VT == MVT::i64 && F64IsLegal && !Subtarget->is64Bit())) && isa<LoadSDNode>(St->getValue()) && diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 550f8bd..fb4eb68 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -378,7 +378,8 @@ namespace llvm { /// determining it. virtual MVT getOptimalMemOpType(uint64_t Size, unsigned Align, - bool isSrcConst, bool isSrcStr) const; + bool isSrcConst, bool isSrcStr, + SelectionDAG &DAG) const; /// LowerOperation - Provide custom lowering hooks for some operations. /// diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 2cd3733..8a9b7c9 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2009,16 +2009,24 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - X86FI->setCalleeSavedFrameSize(CSI.size() * SlotSize); + unsigned CalleeFrameSize = 0; unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); + const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - BuildMI(MBB, MI, DL, get(Opc)) - .addReg(Reg, RegState::Kill); + if (RegClass != &X86::VR128RegClass) { + CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, get(Opc)) + .addReg(Reg, RegState::Kill); + } else { + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); + } } + + X86FI->setCalleeSavedFrameSize(CalleeFrameSize); return true; } @@ -2036,7 +2044,12 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); - BuildMI(MBB, MI, DL, get(Opc), Reg); + const TargetRegisterClass *RegClass = CSI[i].getRegClass(); + if (RegClass != &X86::VR128RegClass) { + BuildMI(MBB, MI, DL, get(Opc), Reg); + } else { + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); + } } return true; } diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td index 8f287e1..43fadc2 100644 --- a/lib/Target/X86/X86InstrMMX.td +++ b/lib/Target/X86/X86InstrMMX.td @@ -577,41 +577,17 @@ def : Pat<(f64 (bitconvert (v4i16 VR64:$src))), def : Pat<(f64 (bitconvert (v8i8 VR64:$src))), (MMX_MOVQ2FR64rr VR64:$src)>; -// Move scalar to MMX zero-extended -// movd to MMX register zero-extends -let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))))), - (MMX_MOVZDI2PDIrr GR32:$src)>; -} - let AddedComplexity = 20 in { - def : Pat<(v8i8 (X86vzmovl (bc_v8i8 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; - def : Pat<(v4i16 (X86vzmovl (bc_v4i16 (load_mmx addr:$src)))), - (MMX_MOVZDI2PDIrm addr:$src)>; def : Pat<(v2i32 (X86vzmovl (bc_v2i32 (load_mmx addr:$src)))), (MMX_MOVZDI2PDIrm addr:$src)>; } // Clear top half. let AddedComplexity = 15 in { - def : Pat<(v8i8 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; - def : Pat<(v4i16 (X86vzmovl VR64:$src)), - (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; def : Pat<(v2i32 (X86vzmovl VR64:$src)), (MMX_PUNPCKLDQrr VR64:$src, (MMX_V_SET0))>; } -// Scalar to v4i16 / v8i8. The source may be a GR32, but only the lower -// 8 or 16-bits matter. -def : Pat<(bc_v8i8 (v2i32 (scalar_to_vector GR32:$src))), - (MMX_MOVD64rr GR32:$src)>; -def : Pat<(bc_v4i16 (v2i32 (scalar_to_vector GR32:$src))), - (MMX_MOVD64rr GR32:$src)>; - // Patterns to perform canonical versions of vector shuffling. let AddedComplexity = 10 in { def : Pat<(v8i8 (mmx_unpckl_undef VR64:$src, (undef))), diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1fafa46..b44c7a6 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3447,7 +3447,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> { } defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; -defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovsxbq", int_x86_sse41_pmovzxbq>; +defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; // Common patterns involving scalar load def : Pat<(int_x86_sse41_pmovsxbq diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h index 8a5ac2c..fafcf7e 100644 --- a/lib/Target/X86/X86MachineFunctionInfo.h +++ b/lib/Target/X86/X86MachineFunctionInfo.h @@ -73,14 +73,15 @@ public: SRetReturnReg(0), GlobalBaseReg(0) {} - X86MachineFunctionInfo(MachineFunction &MF) : ForceFramePointer(false), - CalleeSavedFrameSize(0), - BytesToPopOnReturn(0), - DecorationStyle(None), - ReturnAddrIndex(0), - TailCallReturnAddrDelta(0), - SRetReturnReg(0), - GlobalBaseReg(0) {} + explicit X86MachineFunctionInfo(MachineFunction &MF) + : ForceFramePointer(false), + CalleeSavedFrameSize(0), + BytesToPopOnReturn(0), + DecorationStyle(None), + ReturnAddrIndex(0), + TailCallReturnAddrDelta(0), + SRetReturnReg(0), + GlobalBaseReg(0) {} bool getForceFramePointer() const { return ForceFramePointer;} void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 5af1fb1..c733f26 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -751,10 +751,12 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // function, and use up to 128 bytes of stack space, don't have a frame // pointer, calls, or dynamic alloca then we do not need to adjust the // stack pointer (we fit in the Red Zone). + bool DisableRedZone = Fn->hasFnAttr(Attribute::NoRedZone); if (Is64Bit && !DisableRedZone && !needsStackRealignment(MF) && !MFI->hasVarSizedObjects() && // No dynamic alloca. - !MFI->hasCalls()) { // No calls. + !MFI->hasCalls() && // No calls. + !Subtarget->isTargetWin64()) { // Win64 has no Red Zone uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); if (hasFP(MF)) MinSize += SlotSize; StackSize = std::max(MinSize, @@ -820,13 +822,6 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); } - unsigned ReadyLabelId = 0; - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer is ready. - ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); - } - // Skip the callee-saved push instructions. while (MBBI != MBB.end() && (MBBI->getOpcode() == X86::PUSH32r || @@ -836,20 +831,20 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { if (MBBI != MBB.end()) DL = MBBI->getDebugLoc(); - if (NumBytes) { // adjust stack pointer: ESP -= numbytes + if (NumBytes) { // Adjust stack pointer: ESP -= numbytes. if (NumBytes >= 4096 && Subtarget->isTargetCygMing()) { - // Check, whether EAX is livein for this function + // Check, whether EAX is livein for this function. bool isEAXAlive = false; for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(), EE = MF.getRegInfo().livein_end(); (II != EE) && !isEAXAlive; ++II) { unsigned Reg = II->first; isEAXAlive = (Reg == X86::EAX || Reg == X86::AX || - Reg == X86::AH || Reg == X86::AL); + Reg == X86::AH || Reg == X86::AL); } - // Function prologue calls _alloca to probe the stack when allocating - // more than 4k bytes in one go. Touching the stack at 4K increments is + // Function prologue calls _alloca to probe the stack when allocating more + // than 4k bytes in one go. Touching the stack at 4K increments is // necessary to ensure that the guard pages used by the OS virtual memory // manager are allocated in correct sequence. if (!isEAXAlive) { @@ -861,12 +856,14 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // Save EAX BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) .addReg(X86::EAX, RegState::Kill); + // Allocate NumBytes-4 bytes on stack. We'll also use 4 already // allocated bytes for EAX. - BuildMI(MBB, MBBI, DL, - TII.get(X86::MOV32ri), X86::EAX).addImm(NumBytes-4); + BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) + .addImm(NumBytes-4); BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32)) .addExternalSymbol("_alloca"); + // Restore EAX MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), @@ -878,6 +875,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { // merge the two. This can be the case when tail call elimination is // enabled and the callee has more arguments then the caller. NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true); + // If there is an ADD32ri or SUB32ri of ESP immediately after this // instruction, merge the two instructions. mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes); @@ -887,8 +885,13 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const { } } - if (needsFrameMoves) + if (needsFrameMoves) { + // Mark effective beginning of when frame pointer is ready. + unsigned ReadyLabelId = 0; + ReadyLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, DL, TII.get(X86::DBG_LABEL)).addImm(ReadyLabelId); emitFrameMoves(MF, FrameLabelId, ReadyLabelId); + } } void X86RegisterInfo::emitEpilogue(MachineFunction &MF, diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 8264462..88ab247 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -133,7 +133,8 @@ X86TargetMachine::X86TargetMachine(const Module &M, const std::string &FS, DataLayout(Subtarget.getDataLayout()), FrameInfo(TargetFrameInfo::StackGrowsDown, Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4), - InstrInfo(*this), JITInfo(*this), TLInfo(*this) { + InstrInfo(*this), JITInfo(*this), TLInfo(*this), + ELFWriterInfo(Subtarget.is64Bit()) { DefRelocModel = getRelocationModel(); // FIXME: Correctly select PIC model for Win64 stuff if (getRelocationModel() == Reloc::Default) { @@ -213,6 +214,13 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, CodeGenOpt::Level OptLevel, bool Verbose, raw_ostream &Out) { + // FIXME: Move this somewhere else! + // On Darwin, override 64-bit static relocation to pic_ since the + // assembler doesn't support it. + if (DefRelocModel == Reloc::Static && + Subtarget.isTargetDarwin() && Subtarget.is64Bit()) + setRelocationModel(Reloc::PIC_); + assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); diff --git a/lib/Target/XCore/XCoreMachineFunctionInfo.h b/lib/Target/XCore/XCoreMachineFunctionInfo.h index 43adb0f..124a011 100644 --- a/lib/Target/XCore/XCoreMachineFunctionInfo.h +++ b/lib/Target/XCore/XCoreMachineFunctionInfo.h @@ -40,7 +40,7 @@ public: FPSpillSlot(0), VarArgsFrameIndex(0) {} - XCoreFunctionInfo(MachineFunction &MF) : + explicit XCoreFunctionInfo(MachineFunction &MF) : UsesLR(false), LRSpillSlot(0), FPSpillSlot(0), diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 733dfa9..673d38b 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -59,7 +59,8 @@ cl::opt<bool> EnableLoadPRE("enable-load-pre", cl::init(true)); /// two values. namespace { struct VISIBILITY_HIDDEN Expression { - enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, + UDIV, SDIV, FDIV, UREM, SREM, FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, @@ -200,8 +201,11 @@ Expression::ExpressionOpcode ValueTable::getOpcode(BinaryOperator* BO) { default: // THIS SHOULD NEVER HAPPEN assert(0 && "Binary operator with unknown opcode?"); case Instruction::Add: return Expression::ADD; + case Instruction::FAdd: return Expression::FADD; case Instruction::Sub: return Expression::SUB; + case Instruction::FSub: return Expression::FSUB; case Instruction::Mul: return Expression::MUL; + case Instruction::FMul: return Expression::FMUL; case Instruction::UDiv: return Expression::UDIV; case Instruction::SDiv: return Expression::SDIV; case Instruction::FDiv: return Expression::FDIV; diff --git a/lib/Transforms/Scalar/GVNPRE.cpp b/lib/Transforms/Scalar/GVNPRE.cpp index e3b0937..0f3153f 100644 --- a/lib/Transforms/Scalar/GVNPRE.cpp +++ b/lib/Transforms/Scalar/GVNPRE.cpp @@ -55,7 +55,8 @@ namespace { /// two values. struct Expression { - enum ExpressionOpcode { ADD, SUB, MUL, UDIV, SDIV, FDIV, UREM, SREM, + enum ExpressionOpcode { ADD, FADD, SUB, FSUB, MUL, FMUL, + UDIV, SDIV, FDIV, UREM, SREM, FREM, SHL, LSHR, ASHR, AND, OR, XOR, ICMPEQ, ICMPNE, ICMPUGT, ICMPUGE, ICMPULT, ICMPULE, ICMPSGT, ICMPSGE, ICMPSLT, ICMPSLE, FCMPOEQ, @@ -202,10 +203,16 @@ Expression::ExpressionOpcode switch(BO->getOpcode()) { case Instruction::Add: return Expression::ADD; + case Instruction::FAdd: + return Expression::FADD; case Instruction::Sub: return Expression::SUB; + case Instruction::FSub: + return Expression::FSUB; case Instruction::Mul: return Expression::MUL; + case Instruction::FMul: + return Expression::FMUL; case Instruction::UDiv: return Expression::UDIV; case Instruction::SDiv: diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index af61eae..83503fd 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -754,7 +754,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) { BinaryOperator *Incr = dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge)); if (!Incr) return; - if (Incr->getOpcode() != Instruction::Add) return; + if (Incr->getOpcode() != Instruction::FAdd) return; ConstantFP *IncrValue = NULL; unsigned IncrVIndex = 1; if (Incr->getOperand(1) == PH) diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index e6f854f..97bd34c 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -167,8 +167,11 @@ namespace { // otherwise - Change was made, replace I with returned instruction // Instruction *visitAdd(BinaryOperator &I); + Instruction *visitFAdd(BinaryOperator &I); Instruction *visitSub(BinaryOperator &I); + Instruction *visitFSub(BinaryOperator &I); Instruction *visitMul(BinaryOperator &I); + Instruction *visitFMul(BinaryOperator &I); Instruction *visitURem(BinaryOperator &I); Instruction *visitSRem(BinaryOperator &I); Instruction *visitFRem(BinaryOperator &I); @@ -403,7 +406,8 @@ X("instcombine", "Combine redundant instructions"); // 0 -> undef, 1 -> Const, 2 -> Other, 3 -> Arg, 3 -> Unary, 4 -> OtherInst static unsigned getComplexity(Value *V) { if (isa<Instruction>(V)) { - if (BinaryOperator::isNeg(V) || BinaryOperator::isNot(V)) + if (BinaryOperator::isNeg(V) || BinaryOperator::isFNeg(V) || + BinaryOperator::isNot(V)) return 3; return 4; } @@ -576,6 +580,25 @@ static inline Value *dyn_castNegVal(Value *V) { return 0; } +// dyn_castFNegVal - Given a 'fsub' instruction, return the RHS of the +// instruction if the LHS is a constant negative zero (which is the 'negate' +// form). +// +static inline Value *dyn_castFNegVal(Value *V) { + if (BinaryOperator::isFNeg(V)) + return BinaryOperator::getFNegArgument(V); + + // Constants can be considered to be negated values if they can be folded. + if (ConstantFP *C = dyn_cast<ConstantFP>(V)) + return ConstantExpr::getFNeg(C); + + if (ConstantVector *C = dyn_cast<ConstantVector>(V)) + if (C->getType()->getElementType()->isFloatingPoint()) + return ConstantExpr::getFNeg(C); + + return 0; +} + static inline Value *dyn_castNotVal(Value *V) { if (BinaryOperator::isNot(V)) return BinaryOperator::getNotArgument(V); @@ -1733,12 +1756,12 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, default: assert(0 && "Case stmts out of sync!"); case Intrinsic::x86_sse_sub_ss: case Intrinsic::x86_sse2_sub_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateSub(LHS, RHS, + TmpV = InsertNewInstBefore(BinaryOperator::CreateFSub(LHS, RHS, II->getName()), *II); break; case Intrinsic::x86_sse_mul_ss: case Intrinsic::x86_sse2_mul_sd: - TmpV = InsertNewInstBefore(BinaryOperator::CreateMul(LHS, RHS, + TmpV = InsertNewInstBefore(BinaryOperator::CreateFMul(LHS, RHS, II->getName()), *II); break; } @@ -2052,14 +2075,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return ReplaceInstUsesWith(I, RHS); // X + 0 --> X - if (!I.getType()->isFPOrFPVector()) { // NOTE: -0 + +0 = +0. - if (RHSC->isNullValue()) - return ReplaceInstUsesWith(I, LHS); - } else if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { - if (CFP->isExactlyValue(ConstantFP::getNegativeZero - (I.getType())->getValueAPF())) - return ReplaceInstUsesWith(I, LHS); - } + if (RHSC->isNullValue()) + return ReplaceInstUsesWith(I, LHS); if (ConstantInt *CI = dyn_cast<ConstantInt>(RHSC)) { // X + (signbit) --> X ^ signbit @@ -2317,11 +2334,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return SelectInst::Create(SI->getCondition(), A, N); } } - - // Check for X+0.0. Simplify it to X if we know X is not -0.0. - if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) - if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) - return ReplaceInstUsesWith(I, LHS); // Check for (add (sext x), y), see if we can merge this into an // integer add followed by a sext. @@ -2359,7 +2371,42 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } } - + + return Changed ? &I : 0; +} + +Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Constant *RHSC = dyn_cast<Constant>(RHS)) { + // X + 0 --> X + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) { + if (CFP->isExactlyValue(ConstantFP::getNegativeZero + (I.getType())->getValueAPF())) + return ReplaceInstUsesWith(I, LHS); + } + + if (isa<PHINode>(LHS)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + // -A + B --> B - A + // -A + -B --> -(A + B) + if (Value *LHSV = dyn_castFNegVal(LHS)) + return BinaryOperator::CreateFSub(RHS, LHSV); + + // A + -B --> A - B + if (!isa<Constant>(RHS)) + if (Value *V = dyn_castFNegVal(RHS)) + return BinaryOperator::CreateFSub(LHS, V); + + // Check for X+0.0. Simplify it to X if we know X is not -0.0. + if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) + if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS)) + return ReplaceInstUsesWith(I, LHS); + // Check for (add double (sitofp x), y), see if we can merge this into an // integer add followed by a promotion. if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { @@ -2407,8 +2454,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { Instruction *InstCombiner::visitSub(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); - if (Op0 == Op1 && // sub X, X -> 0 - !I.getType()->isFPOrFPVector()) + if (Op0 == Op1) // sub X, X -> 0 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // If this is a 'B = x-(-A)', change to B = x+A... @@ -2469,8 +2515,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateXor(Op0, Op1); if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { - if (Op1I->getOpcode() == Instruction::Add && - !Op0->getType()->isFPOrFPVector()) { + if (Op1I->getOpcode() == Instruction::Add) { if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y return BinaryOperator::CreateNeg(Op1I->getOperand(1), I.getName()); else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y @@ -2487,8 +2532,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression // is not used by anyone else... // - if (Op1I->getOpcode() == Instruction::Sub && - !Op1I->getType()->isFPOrFPVector()) { + if (Op1I->getOpcode() == Instruction::Sub) { // Swap the two operands of the subexpr... Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); Op1I->setOperand(0, IIOp1); @@ -2526,18 +2570,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } } - if (!Op0->getType()->isFPOrFPVector()) - if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { - if (Op0I->getOpcode() == Instruction::Add) { - if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(1)); - else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X - return ReplaceInstUsesWith(I, Op0I->getOperand(0)); - } else if (Op0I->getOpcode() == Instruction::Sub) { - if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y - return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName()); - } + if (BinaryOperator *Op0I = dyn_cast<BinaryOperator>(Op0)) { + if (Op0I->getOpcode() == Instruction::Add) { + if (Op0I->getOperand(0) == Op1) // (Y+X)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(1)); + else if (Op0I->getOperand(1) == Op1) // (X+Y)-Y == X + return ReplaceInstUsesWith(I, Op0I->getOperand(0)); + } else if (Op0I->getOpcode() == Instruction::Sub) { + if (Op0I->getOperand(0) == Op1) // (X-Y)-X == -Y + return BinaryOperator::CreateNeg(Op0I->getOperand(1), I.getName()); } + } ConstantInt *C1; if (Value *X = dyn_castFoldableMul(Op0, C1)) { @@ -2551,6 +2594,40 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return 0; } +Instruction *InstCombiner::visitFSub(BinaryOperator &I) { + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + + // If this is a 'B = x-(-A)', change to B = x+A... + if (Value *V = dyn_castFNegVal(Op1)) + return BinaryOperator::CreateFAdd(Op0, V); + + if (BinaryOperator *Op1I = dyn_cast<BinaryOperator>(Op1)) { + if (Op1I->getOpcode() == Instruction::FAdd) { + if (Op1I->getOperand(0) == Op0) // X-(X+Y) == -Y + return BinaryOperator::CreateFNeg(Op1I->getOperand(1), I.getName()); + else if (Op1I->getOperand(1) == Op0) // X-(Y+X) == -Y + return BinaryOperator::CreateFNeg(Op1I->getOperand(0), I.getName()); + } + + if (Op1I->hasOneUse()) { + // Replace (x - (y - z)) with (x + (z - y)) if the (y - z) subexpression + // is not used by anyone else... + // + if (Op1I->getOpcode() == Instruction::FSub) { + // Swap the two operands of the subexpr... + Value *IIOp0 = Op1I->getOperand(0), *IIOp1 = Op1I->getOperand(1); + Op1I->setOperand(0, IIOp1); + Op1I->setOperand(1, IIOp0); + + // Create the new top level fadd instruction... + return BinaryOperator::CreateFAdd(Op0, Op1); + } + } + } + + return 0; +} + /// isSignBitCheck - Given an exploded icmp instruction, return true if the /// comparison only checks the sign bit. If it only checks the sign bit, set /// TrueIfSigned if the result of the comparison is true when the input value is @@ -2585,7 +2662,9 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { bool Changed = SimplifyCommutative(I); Value *Op0 = I.getOperand(0); - if (isa<UndefValue>(I.getOperand(1))) // undef * X -> 0 + // TODO: If Op1 is undef and Op0 is finite, return zero. + if (!I.getType()->isFPOrFPVector() && + isa<UndefValue>(I.getOperand(1))) // undef * X -> 0 return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType())); // Simplify mul instructions with a constant RHS... @@ -2611,17 +2690,8 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return BinaryOperator::CreateShl(Op0, ConstantInt::get(Op0->getType(), Val.logBase2())); } - } else if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) { - if (Op1F->isNullValue()) - return ReplaceInstUsesWith(I, Op1); - - // "In IEEE floating point, x*1 is not equivalent to x for nans. However, - // ANSI says we can drop signals, so we can do this anyway." (from GCC) - if (Op1F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' } else if (isa<VectorType>(Op1->getType())) { - if (isa<ConstantAggregateZero>(Op1)) - return ReplaceInstUsesWith(I, Op1); + // TODO: If Op1 is all zeros and Op0 is all finite, return all zeros. if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { if (Op1V->isAllOnesValue()) // X * -1 == 0 - X @@ -2629,9 +2699,6 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { // As above, vector X*splat(1.0) -> X in all defined cases. if (Constant *Splat = Op1V->getSplatValue()) { - if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) - if (F->isExactlyValue(1.0)) - return ReplaceInstUsesWith(I, Op0); if (ConstantInt *CI = dyn_cast<ConstantInt>(Splat)) if (CI->equalsInt(1)) return ReplaceInstUsesWith(I, Op0); @@ -2755,6 +2822,45 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { return Changed ? &I : 0; } +Instruction *InstCombiner::visitFMul(BinaryOperator &I) { + bool Changed = SimplifyCommutative(I); + Value *Op0 = I.getOperand(0); + + // Simplify mul instructions with a constant RHS... + if (Constant *Op1 = dyn_cast<Constant>(I.getOperand(1))) { + if (ConstantFP *Op1F = dyn_cast<ConstantFP>(Op1)) { + // "In IEEE floating point, x*1 is not equivalent to x for nans. However, + // ANSI says we can drop signals, so we can do this anyway." (from GCC) + if (Op1F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0' + } else if (isa<VectorType>(Op1->getType())) { + if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1)) { + // As above, vector X*splat(1.0) -> X in all defined cases. + if (Constant *Splat = Op1V->getSplatValue()) { + if (ConstantFP *F = dyn_cast<ConstantFP>(Splat)) + if (F->isExactlyValue(1.0)) + return ReplaceInstUsesWith(I, Op0); + } + } + } + + // Try to fold constant mul into select arguments. + if (SelectInst *SI = dyn_cast<SelectInst>(Op0)) + if (Instruction *R = FoldOpIntoSelect(I, SI, this)) + return R; + + if (isa<PHINode>(Op0)) + if (Instruction *NV = FoldOpIntoPhi(I)) + return NV; + } + + if (Value *Op0v = dyn_castFNegVal(Op0)) // -X * -Y = X*Y + if (Value *Op1v = dyn_castFNegVal(I.getOperand(1))) + return BinaryOperator::CreateFMul(Op0v, Op1v); + + return Changed ? &I : 0; +} + /// SimplifyDivRemOfSelect - Try to fold a divide or remainder of a select /// instruction. bool InstCombiner::SimplifyDivRemOfSelect(BinaryOperator &I) { @@ -8562,17 +8668,17 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) { if (Instruction *I = commonCastTransforms(CI)) return I; - // If we have fptrunc(add (fpextend x), (fpextend y)), where x and y are + // If we have fptrunc(fadd (fpextend x), (fpextend y)), where x and y are // smaller than the destination type, we can eliminate the truncate by doing - // the add as the smaller type. This applies to add/sub/mul/div as well as + // the add as the smaller type. This applies to fadd/fsub/fmul/fdiv as well as // many builtins (sqrt, etc). BinaryOperator *OpI = dyn_cast<BinaryOperator>(CI.getOperand(0)); if (OpI && OpI->hasOneUse()) { switch (OpI->getOpcode()) { default: break; - case Instruction::Add: - case Instruction::Sub: - case Instruction::Mul: + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: case Instruction::FDiv: case Instruction::FRem: const Type *SrcTy = OpI->getType(); @@ -9322,11 +9428,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // Turn select C, (X+Y), (X-Y) --> (X+(select C, Y, (-Y))). This is // even legal for FP. - if (TI->getOpcode() == Instruction::Sub && - FI->getOpcode() == Instruction::Add) { + if ((TI->getOpcode() == Instruction::Sub && + FI->getOpcode() == Instruction::Add) || + (TI->getOpcode() == Instruction::FSub && + FI->getOpcode() == Instruction::FAdd)) { AddOp = FI; SubOp = TI; - } else if (FI->getOpcode() == Instruction::Sub && - TI->getOpcode() == Instruction::Add) { + } else if ((FI->getOpcode() == Instruction::Sub && + TI->getOpcode() == Instruction::Add) || + (FI->getOpcode() == Instruction::FSub && + TI->getOpcode() == Instruction::FAdd)) { AddOp = TI; SubOp = FI; } diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 92270b5..944f409 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -2268,7 +2268,8 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) { /* create new increment. '++d' in above example. */ ConstantFP *CFP = ConstantFP::get(DestTy, C->getZExtValue()); BinaryOperator *NewIncr = - BinaryOperator::Create(Incr->getOpcode(), + BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? + Instruction::FAdd : Instruction::FSub, NewPH, CFP, "IV.S.next.", Incr); NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); @@ -2424,24 +2425,14 @@ void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) { // Get the terminating condition for the loop if possible (this isn't // necessarily in the latch, or a block that's a predecessor of the header). - SmallVector<BasicBlock*, 8> ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() != 1) return; + if (!L->getExitBlock()) + return; // More than one loop exit blocks. // Okay, there is one exit block. Try to find the condition that causes the // loop to be exited. - BasicBlock *ExitBlock = ExitBlocks[0]; - - BasicBlock *ExitingBlock = 0; - for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock); - PI != E; ++PI) - if (L->contains(*PI)) { - if (ExitingBlock == 0) - ExitingBlock = *PI; - else - return; // More than one block exiting! - } - assert(ExitingBlock && "No exits from loop, something is broken!"); + BasicBlock *ExitingBlock = L->getExitingBlock(); + if (!ExitingBlock) + return; // More than one block exiting! // Okay, we've computed the exiting block. See what condition causes us to // exit. diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 4b00640..59989c9 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1009,7 +1009,7 @@ struct VISIBILITY_HIDDEN PowOpt : public LibCallOptimization { if (Op2C->isExactlyValue(1.0)) // pow(x, 1.0) -> x return Op1; if (Op2C->isExactlyValue(2.0)) // pow(x, 2.0) -> x*x - return B.CreateMul(Op1, Op1, "pow2"); + return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); return 0; diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 2cde765..bcc6b81 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -419,9 +419,6 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, case Instruction::LShr: case Instruction::AShr: case Instruction::ICmp: - case Instruction::FCmp: - if (I->getOperand(0)->getType()->isFPOrFPVector()) - return false; // FP arithmetic might trap. break; // These are all cheap and non-trapping instructions. } @@ -1012,9 +1009,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) { default: return false; // Not safe / profitable to hoist. case Instruction::Add: case Instruction::Sub: - // FP arithmetic might trap. Not worth doing for vector ops. - if (HInst->getType()->isFloatingPoint() - || isa<VectorType>(HInst->getType())) + // Not worth doing for vector ops. + if (isa<VectorType>(HInst->getType())) return false; break; case Instruction::And: diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index 5a8fad9..8dfbd1d 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -59,6 +59,10 @@ std::string Attribute::getAsString(Attributes Attrs) { Result += "ssp "; if (Attrs & Attribute::StackProtectReq) Result += "sspreq "; + if (Attrs & Attribute::NoRedZone) + Result += "noredzone "; + if (Attrs & Attribute::NoImplicitFloat) + Result += "noimplicitfloat "; if (Attrs & Attribute::Alignment) { Result += "align "; Result += utostr(Attribute::getAlignmentFromAttrs(Attrs)); diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp index 7e4902f..1d293cc 100644 --- a/lib/VMCore/ConstantFold.cpp +++ b/lib/VMCore/ConstantFold.cpp @@ -602,10 +602,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, return Constant::getNullValue(C1->getType()); case Instruction::UDiv: case Instruction::SDiv: - case Instruction::FDiv: case Instruction::URem: case Instruction::SRem: - case Instruction::FRem: if (!isa<UndefValue>(C2)) // undef / X -> 0 return Constant::getNullValue(C1->getType()); return const_cast<Constant*>(C2); // X / undef -> undef @@ -783,13 +781,13 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, switch (Opcode) { default: break; - case Instruction::Add: + case Instruction::FAdd: (void)C3V.add(C2V, APFloat::rmNearestTiesToEven); return ConstantFP::get(C3V); - case Instruction::Sub: + case Instruction::FSub: (void)C3V.subtract(C2V, APFloat::rmNearestTiesToEven); return ConstantFP::get(C3V); - case Instruction::Mul: + case Instruction::FMul: (void)C3V.multiply(C2V, APFloat::rmNearestTiesToEven); return ConstantFP::get(C3V); case Instruction::FDiv: @@ -808,12 +806,18 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, switch (Opcode) { default: break; - case Instruction::Add: + case Instruction::Add: return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getAdd); - case Instruction::Sub: + case Instruction::FAdd: + return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFAdd); + case Instruction::Sub: return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getSub); - case Instruction::Mul: + case Instruction::FSub: + return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFSub); + case Instruction::Mul: return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getMul); + case Instruction::FMul: + return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getFMul); case Instruction::UDiv: return EvalVectorOp(CP1, CP2, VTy, ConstantExpr::getUDiv); case Instruction::SDiv: @@ -851,7 +855,9 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, // other way if possible. switch (Opcode) { case Instruction::Add: + case Instruction::FAdd: case Instruction::Mul: + case Instruction::FMul: case Instruction::And: case Instruction::Or: case Instruction::Xor: @@ -862,6 +868,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode, case Instruction::LShr: case Instruction::AShr: case Instruction::Sub: + case Instruction::FSub: case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv: diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index 97f3ac9..69c503d 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -775,26 +775,46 @@ const SmallVector<unsigned, 4> &ConstantExpr::getIndices() const { /// specify the full Instruction::OPCODE identifier. /// Constant *ConstantExpr::getNeg(Constant *C) { + // API compatibility: Adjust integer opcodes to floating-point opcodes. + if (C->getType()->isFPOrFPVector()) + return getFNeg(C); + assert(C->getType()->isIntOrIntVector() && + "Cannot NEG a nonintegral value!"); return get(Instruction::Sub, ConstantExpr::getZeroValueForNegationExpr(C->getType()), C); } +Constant *ConstantExpr::getFNeg(Constant *C) { + assert(C->getType()->isFPOrFPVector() && + "Cannot FNEG a non-floating-point value!"); + return get(Instruction::FSub, + ConstantExpr::getZeroValueForNegationExpr(C->getType()), + C); +} Constant *ConstantExpr::getNot(Constant *C) { - assert((isa<IntegerType>(C->getType()) || - cast<VectorType>(C->getType())->getElementType()->isInteger()) && - "Cannot NOT a nonintegral value!"); + assert(C->getType()->isIntOrIntVector() && + "Cannot NOT a nonintegral value!"); return get(Instruction::Xor, C, Constant::getAllOnesValue(C->getType())); } Constant *ConstantExpr::getAdd(Constant *C1, Constant *C2) { return get(Instruction::Add, C1, C2); } +Constant *ConstantExpr::getFAdd(Constant *C1, Constant *C2) { + return get(Instruction::FAdd, C1, C2); +} Constant *ConstantExpr::getSub(Constant *C1, Constant *C2) { return get(Instruction::Sub, C1, C2); } +Constant *ConstantExpr::getFSub(Constant *C1, Constant *C2) { + return get(Instruction::FSub, C1, C2); +} Constant *ConstantExpr::getMul(Constant *C1, Constant *C2) { return get(Instruction::Mul, C1, C2); } +Constant *ConstantExpr::getFMul(Constant *C1, Constant *C2) { + return get(Instruction::FMul, C1, C2); +} Constant *ConstantExpr::getUDiv(Constant *C1, Constant *C2) { return get(Instruction::UDiv, C1, C2); } @@ -2142,15 +2162,28 @@ Constant *ConstantExpr::getCompareTy(unsigned short predicate, } Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2) { + // API compatibility: Adjust integer opcodes to floating-point opcodes. + if (C1->getType()->isFPOrFPVector()) { + if (Opcode == Instruction::Add) Opcode = Instruction::FAdd; + else if (Opcode == Instruction::Sub) Opcode = Instruction::FSub; + else if (Opcode == Instruction::Mul) Opcode = Instruction::FMul; + } #ifndef NDEBUG switch (Opcode) { - case Instruction::Add: + case Instruction::Add: case Instruction::Sub: - case Instruction::Mul: + case Instruction::Mul: assert(C1->getType() == C2->getType() && "Op types should be identical!"); - assert((C1->getType()->isInteger() || C1->getType()->isFloatingPoint() || - isa<VectorType>(C1->getType())) && - "Tried to create an arithmetic operation on a non-arithmetic type!"); + assert(C1->getType()->isIntOrIntVector() && + "Tried to create an integer operation on a non-integer type!"); + break; + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + assert(C1->getType() == C2->getType() && "Op types should be identical!"); + assert(C1->getType()->isFPOrFPVector() && + "Tried to create a floating-point operation on a " + "non-floating-point type!"); break; case Instruction::UDiv: case Instruction::SDiv: diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp index 9e030b7..7556b8e 100644 --- a/lib/VMCore/Instruction.cpp +++ b/lib/VMCore/Instruction.cpp @@ -101,8 +101,11 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { // Standard binary operators... case Add: return "add"; + case FAdd: return "fadd"; case Sub: return "sub"; + case FSub: return "fsub"; case Mul: return "mul"; + case FMul: return "fmul"; case UDiv: return "udiv"; case SDiv: return "sdiv"; case FDiv: return "fdiv"; @@ -330,19 +333,13 @@ bool Instruction::mayThrow() const { /// isAssociative - Return true if the instruction is associative: /// -/// Associative operators satisfy: x op (y op z) === (x op y) op z) +/// Associative operators satisfy: x op (y op z) === (x op y) op z /// -/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative, when not -/// applied to floating point types. +/// In LLVM, the Add, Mul, And, Or, and Xor operators are associative. /// bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) { - if (Opcode == And || Opcode == Or || Opcode == Xor) - return true; - - // Add/Mul reassociate unless they are FP or FP vectors. - if (Opcode == Add || Opcode == Mul) - return !Ty->isFPOrFPVector(); - return 0; + return Opcode == And || Opcode == Or || Opcode == Xor || + Opcode == Add || Opcode == Mul; } /// isCommutative - Return true if the instruction is commutative: @@ -355,7 +352,9 @@ bool Instruction::isAssociative(unsigned Opcode, const Type *Ty) { bool Instruction::isCommutative(unsigned op) { switch (op) { case Add: + case FAdd: case Mul: + case FMul: case And: case Or: case Xor: diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index fe30271..4c228fe 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -1502,29 +1502,43 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg, // BinaryOperator Class //===----------------------------------------------------------------------===// +/// AdjustIType - Map Add, Sub, and Mul to FAdd, FSub, and FMul when the +/// type is floating-point, to help provide compatibility with an older API. +/// +static BinaryOperator::BinaryOps AdjustIType(BinaryOperator::BinaryOps iType, + const Type *Ty) { + // API compatibility: Adjust integer opcodes to floating-point opcodes. + if (Ty->isFPOrFPVector()) { + if (iType == BinaryOperator::Add) iType = BinaryOperator::FAdd; + else if (iType == BinaryOperator::Sub) iType = BinaryOperator::FSub; + else if (iType == BinaryOperator::Mul) iType = BinaryOperator::FMul; + } + return iType; +} + BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty, const std::string &Name, Instruction *InsertBefore) - : Instruction(Ty, iType, + : Instruction(Ty, AdjustIType(iType, Ty), OperandTraits<BinaryOperator>::op_begin(this), OperandTraits<BinaryOperator>::operands(this), InsertBefore) { Op<0>() = S1; Op<1>() = S2; - init(iType); + init(AdjustIType(iType, Ty)); setName(Name); } BinaryOperator::BinaryOperator(BinaryOps iType, Value *S1, Value *S2, const Type *Ty, const std::string &Name, BasicBlock *InsertAtEnd) - : Instruction(Ty, iType, + : Instruction(Ty, AdjustIType(iType, Ty), OperandTraits<BinaryOperator>::op_begin(this), OperandTraits<BinaryOperator>::operands(this), InsertAtEnd) { Op<0>() = S1; Op<1>() = S2; - init(iType); + init(AdjustIType(iType, Ty)); setName(Name); } @@ -1537,12 +1551,19 @@ void BinaryOperator::init(BinaryOps iType) { #ifndef NDEBUG switch (iType) { case Add: case Sub: - case Mul: + case Mul: + assert(getType() == LHS->getType() && + "Arithmetic operation should return same type as operands!"); + assert(getType()->isIntOrIntVector() && + "Tried to create an integer operation on a non-integer type!"); + break; + case FAdd: case FSub: + case FMul: assert(getType() == LHS->getType() && "Arithmetic operation should return same type as operands!"); - assert((getType()->isInteger() || getType()->isFloatingPoint() || - isa<VectorType>(getType())) && - "Tried to create an arithmetic operation on a non-arithmetic type!"); + assert(getType()->isFPOrFPVector() && + "Tried to create a floating-point operation on a " + "non-floating-point type!"); break; case UDiv: case SDiv: @@ -1631,6 +1652,22 @@ BinaryOperator *BinaryOperator::CreateNeg(Value *Op, const std::string &Name, Op->getType(), Name, InsertAtEnd); } +BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name, + Instruction *InsertBefore) { + Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType()); + return new BinaryOperator(Instruction::FSub, + zero, Op, + Op->getType(), Name, InsertBefore); +} + +BinaryOperator *BinaryOperator::CreateFNeg(Value *Op, const std::string &Name, + BasicBlock *InsertAtEnd) { + Value *zero = ConstantExpr::getZeroValueForNegationExpr(Op->getType()); + return new BinaryOperator(Instruction::FSub, + zero, Op, + Op->getType(), Name, InsertAtEnd); +} + BinaryOperator *BinaryOperator::CreateNot(Value *Op, const std::string &Name, Instruction *InsertBefore) { Constant *C; @@ -1679,6 +1716,14 @@ bool BinaryOperator::isNeg(const Value *V) { return false; } +bool BinaryOperator::isFNeg(const Value *V) { + if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) + if (Bop->getOpcode() == Instruction::FSub) + return Bop->getOperand(0) == + ConstantExpr::getZeroValueForNegationExpr(Bop->getType()); + return false; +} + bool BinaryOperator::isNot(const Value *V) { if (const BinaryOperator *Bop = dyn_cast<BinaryOperator>(V)) return (Bop->getOpcode() == Instruction::Xor && @@ -1696,6 +1741,15 @@ const Value *BinaryOperator::getNegArgument(const Value *BinOp) { return getNegArgument(const_cast<Value*>(BinOp)); } +Value *BinaryOperator::getFNegArgument(Value *BinOp) { + assert(isFNeg(BinOp) && "getFNegArgument from non-'fneg' instruction!"); + return cast<BinaryOperator>(BinOp)->getOperand(1); +} + +const Value *BinaryOperator::getFNegArgument(const Value *BinOp) { + return getFNegArgument(const_cast<Value*>(BinOp)); +} + Value *BinaryOperator::getNotArgument(Value *BinOp) { assert(isNot(BinOp) && "getNotArgument on non-'not' instruction!"); BinaryOperator *BO = cast<BinaryOperator>(BinOp); diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp index 59ec3be..b047d0c 100644 --- a/lib/VMCore/Verifier.cpp +++ b/lib/VMCore/Verifier.cpp @@ -1069,13 +1069,40 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { "Both operands to a binary operator are not of the same type!", &B); switch (B.getOpcode()) { + // Check that integer arithmetic operators are only used with + // integral operands. + case Instruction::Add: + case Instruction::Sub: + case Instruction::Mul: + case Instruction::SDiv: + case Instruction::UDiv: + case Instruction::SRem: + case Instruction::URem: + Assert1(B.getType()->isIntOrIntVector(), + "Integer arithmetic operators only work with integral types!", &B); + Assert1(B.getType() == B.getOperand(0)->getType(), + "Integer arithmetic operators must have same type " + "for operands and result!", &B); + break; + // Check that floating-point arithmetic operators are only used with + // floating-point operands. + case Instruction::FAdd: + case Instruction::FSub: + case Instruction::FMul: + case Instruction::FDiv: + case Instruction::FRem: + Assert1(B.getType()->isFPOrFPVector(), + "Floating-point arithmetic operators only work with " + "floating-point types!", &B); + Assert1(B.getType() == B.getOperand(0)->getType(), + "Floating-point arithmetic operators must have same type " + "for operands and result!", &B); + break; // Check that logical operators are only used with integral operands. case Instruction::And: case Instruction::Or: case Instruction::Xor: - Assert1(B.getType()->isInteger() || - (isa<VectorType>(B.getType()) && - cast<VectorType>(B.getType())->getElementType()->isInteger()), + Assert1(B.getType()->isIntOrIntVector(), "Logical operators only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Logical operators must have same type for operands and result!", @@ -1084,22 +1111,13 @@ void Verifier::visitBinaryOperator(BinaryOperator &B) { case Instruction::Shl: case Instruction::LShr: case Instruction::AShr: - Assert1(B.getType()->isInteger() || - (isa<VectorType>(B.getType()) && - cast<VectorType>(B.getType())->getElementType()->isInteger()), + Assert1(B.getType()->isIntOrIntVector(), "Shifts only work with integral types!", &B); Assert1(B.getType() == B.getOperand(0)->getType(), "Shift return type must be same as operands!", &B); - /* FALL THROUGH */ - default: - // Arithmetic operators only work on integer or fp values - Assert1(B.getType() == B.getOperand(0)->getType(), - "Arithmetic operators must have same type for operands and result!", - &B); - Assert1(B.getType()->isInteger() || B.getType()->isFloatingPoint() || - isa<VectorType>(B.getType()), - "Arithmetic operators must have integer, fp, or vector type!", &B); break; + default: + assert(0 && "Unknown BinaryOperator opcode!"); } visitInstruction(B); |