diff options
Diffstat (limited to 'lib/CodeGen')
-rw-r--r-- | lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1 | ||||
-rw-r--r-- | lib/CodeGen/AsmPrinter/DwarfPrinter.cpp | 1 | ||||
-rw-r--r-- | lib/CodeGen/ELF.h | 186 | ||||
-rw-r--r-- | lib/CodeGen/ELFCodeEmitter.cpp | 84 | ||||
-rw-r--r-- | lib/CodeGen/ELFCodeEmitter.h | 7 | ||||
-rw-r--r-- | lib/CodeGen/ELFWriter.cpp | 73 | ||||
-rw-r--r-- | lib/CodeGen/ELFWriter.h | 90 | ||||
-rw-r--r-- | lib/CodeGen/MachineInstr.cpp | 1 | ||||
-rw-r--r-- | lib/CodeGen/RegAllocLinearScan.cpp | 51 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 22 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/FastISel.cpp | 24 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 197 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 27 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 10 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 19 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp | 46 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/SelectionDAGBuild.h | 9 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 17 | ||||
-rw-r--r-- | lib/CodeGen/VirtRegRewriter.cpp | 84 |
19 files changed, 519 insertions, 430 deletions
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 25217b0..5a66f4b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -20,7 +20,6 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" -#include <ostream> using namespace llvm; static TimerGroup &getDwarfTimerGroup() { diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp index 45e7dd3..f7ca4f4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp @@ -21,7 +21,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include <ostream> using namespace llvm; diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h new file mode 100644 index 0000000..bf43622 --- /dev/null +++ b/lib/CodeGen/ELF.h @@ -0,0 +1,186 @@ +//===-- lib/CodeGen/ELF.h - ELF constants and data structures ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This header contains common, non-processor-specific data structures and +// constants for the ELF file format. +// +// The details of the ELF32 bits in this file are largely based on +// the Tool Interface Standard (TIS) Executable and Linking Format +// (ELF) Specification Version 1.2, May 1995. The ELF64 stuff is not +// standardized, as far as I can tell. It was largely based on information +// I found in OpenBSD header files. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ELF_H +#define CODEGEN_ELF_H + +#include "llvm/Support/DataTypes.h" +#include <cstring> + +namespace llvm { + class GlobalVariable; + + // Identification Indexes + enum { + EI_MAG0 = 0, + EI_MAG1 = 1, + EI_MAG2 = 2, + EI_MAG3 = 3 + }; + + // File types + enum { + ET_NONE = 0, // No file type + ET_REL = 1, // Relocatable file + ET_EXEC = 2, // Executable file + ET_DYN = 3, // Shared object file + ET_CORE = 4, // Core file + ET_LOPROC = 0xff00, // Beginning of processor-specific codes + ET_HIPROC = 0xffff // Processor-specific + }; + + // Object file classes. + enum { + ELFCLASS32 = 1, // 32-bit object file + ELFCLASS64 = 2 // 64-bit object file + }; + + // Object file byte orderings. + enum { + ELFDATA2LSB = 1, // Little-endian object file + ELFDATA2MSB = 2 // Big-endian object file + }; + + // Versioning + enum { + EV_NONE = 0, + EV_CURRENT = 1 + }; + + /// ELFSection - This struct contains information about each section that is + /// emitted to the file. This is eventually turned into the section header + /// table at the end of the file. + struct ELFSection { + + // ELF specific fields + std::string Name; // Name of the section. + unsigned NameIdx; // Index in .shstrtab of name, once emitted. + unsigned Type; + unsigned Flags; + uint64_t Addr; + unsigned Offset; + unsigned Size; + unsigned Link; + unsigned Info; + unsigned Align; + unsigned EntSize; + + // Section Header Flags + enum { + SHF_WRITE = 1 << 0, // Writable + SHF_ALLOC = 1 << 1, // Mapped into the process addr space + SHF_EXECINSTR = 1 << 2, // Executable + SHF_MERGE = 1 << 4, // Might be merged if equal + SHF_STRINGS = 1 << 5, // Contains null-terminated strings + SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index + SHF_LINK_ORDER = 1 << 7, // Preserve order after combining + SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required + SHF_GROUP = 1 << 9, // Section is a member of a group + SHF_TLS = 1 << 10 // Section holds thread-local data + }; + + // Section Types + enum { + SHT_NULL = 0, // No associated section (inactive entry). + SHT_PROGBITS = 1, // Program-defined contents. + SHT_SYMTAB = 2, // Symbol table. + SHT_STRTAB = 3, // String table. + SHT_RELA = 4, // Relocation entries; explicit addends. + SHT_HASH = 5, // Symbol hash table. + SHT_DYNAMIC = 6, // Information for dynamic linking. + SHT_NOTE = 7, // Information about the file. + SHT_NOBITS = 8, // Data occupies no space in the file. + SHT_REL = 9, // Relocation entries; no explicit addends. + SHT_SHLIB = 10, // Reserved. + SHT_DYNSYM = 11, // Symbol table. + SHT_LOPROC = 0x70000000, // Lowest processor architecture-specific type. + SHT_HIPROC = 0x7fffffff, // Highest processor architecture-specific type. + SHT_LOUSER = 0x80000000, // Lowest type reserved for applications. + SHT_HIUSER = 0xffffffff // Highest type reserved for applications. + }; + + // Special section indices. + enum { + SHN_UNDEF = 0, // Undefined, missing, irrelevant, or meaningless + SHN_LORESERVE = 0xff00, // Lowest reserved index + SHN_LOPROC = 0xff00, // Lowest processor-specific index + SHN_HIPROC = 0xff1f, // Highest processor-specific index + SHN_ABS = 0xfff1, // Symbol has absolute value; does not need relocation + SHN_COMMON = 0xfff2, // FORTRAN COMMON or C external global variables + SHN_HIRESERVE = 0xffff // Highest reserved index + }; + + /// SectionIdx - The number of the section in the Section Table. + unsigned short SectionIdx; + + /// SectionData - The actual data for this section which we are building + /// up for emission to the file. + std::vector<unsigned char> SectionData; + + ELFSection(const std::string &name) + : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), + Link(0), Info(0), Align(0), EntSize(0) {} + }; + + /// ELFSym - This struct contains information about each symbol that is + /// added to logical symbol table for the module. This is eventually + /// turned into a real symbol table in the file. + struct ELFSym { + const GlobalValue *GV; // The global value this corresponds to. + + // ELF specific fields + unsigned NameIdx; // Index in .strtab of name, once emitted. + uint64_t Value; + unsigned Size; + uint8_t Info; + uint8_t Other; + unsigned short SectionIdx; + + enum { + STB_LOCAL = 0, + STB_GLOBAL = 1, + STB_WEAK = 2 + }; + + enum { + STT_NOTYPE = 0, + STT_OBJECT = 1, + STT_FUNC = 2, + STT_SECTION = 3, + STT_FILE = 4 + }; + + ELFSym(const GlobalValue *gv) : GV(gv), Value(0), + Size(0), Info(0), Other(0), + SectionIdx(ELFSection::SHN_UNDEF) {} + + void SetBind(unsigned X) { + assert(X == (X & 0xF) && "Bind value out of range!"); + Info = (Info & 0x0F) | (X << 4); + } + void SetType(unsigned X) { + assert(X == (X & 0xF) && "Type value out of range!"); + Info = (Info & 0xF0) | X; + } + }; + +} // end namespace llvm + +#endif diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 0a0245f..9af276b 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -7,17 +7,17 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "elfce" + #include "ELFCodeEmitter.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Mangler.h" -#include "llvm/Support/OutputBuffer.h" +#include "llvm/Support/Debug.h" //===----------------------------------------------------------------------===// // ELFCodeEmitter Implementation @@ -27,67 +27,87 @@ namespace llvm { /// startFunction - This callback is invoked when a new machine function is /// about to be emitted. -void ELFCodeEmitter::startFunction(MachineFunction &F) { - // Align the output buffer to the appropriate alignment. - unsigned Align = 16; // FIXME: GENERICIZE!! +void ELFCodeEmitter::startFunction(MachineFunction &MF) { + const TargetData *TD = TM.getTargetData(); + const Function *F = MF.getFunction(); + + // Align the output buffer to the appropriate alignment, power of 2. + unsigned FnAlign = F->getAlignment(); + unsigned TDAlign = TD->getPrefTypeAlignment(F->getType()); + unsigned Align = std::max(FnAlign, TDAlign); + assert(!(Align & (Align-1)) && "Alignment is not a power of two!"); + // Get the ELF Section that this function belongs in. - ES = &EW.getSection(".text", ELFWriter::ELFSection::SHT_PROGBITS, - ELFWriter::ELFSection::SHF_EXECINSTR | - ELFWriter::ELFSection::SHF_ALLOC); - OutBuffer = &ES->SectionData; - cerr << "FIXME: This code needs to be updated for changes in the " - << "CodeEmitter interfaces. In particular, this should set " - << "BufferBegin/BufferEnd/CurBufferPtr, not deal with OutBuffer!"; - abort(); + ES = &EW.getTextSection(); + + // FIXME: better memory management, this will be replaced by BinaryObjects + ES->SectionData.reserve(4096); + BufferBegin = &ES->SectionData[0]; + BufferEnd = BufferBegin + ES->SectionData.capacity(); // Upgrade the section alignment if required. if (ES->Align < Align) ES->Align = Align; - // Add padding zeros to the end of the buffer to make sure that the - // function will start on the correct byte alignment within the section. - OutputBuffer OB(*OutBuffer, - TM.getTargetData()->getPointerSizeInBits() == 64, - TM.getTargetData()->isLittleEndian()); - OB.align(Align); - FnStart = OutBuffer->size(); + // Round the size up to the correct alignment for starting the new function. + ES->Size = (ES->Size + (Align-1)) & (-Align); + + // Snaity check on allocated space for text section + assert( ES->Size < 4096 && "no more space in TextSection" ); + + // FIXME: Using ES->Size directly here instead of calculating it from the + // output buffer size (impossible because the code emitter deals only in raw + // bytes) forces us to manually synchronize size and write padding zero bytes + // to the output buffer for all non-text sections. For text sections, we do + // not synchonize the output buffer, and we just blow up if anyone tries to + // write non-code to it. An assert should probably be added to + // AddSymbolToSection to prevent calling it on the text section. + CurBufferPtr = BufferBegin + ES->Size; + + // Record function start address relative to BufferBegin + FnStartPtr = CurBufferPtr; } /// finishFunction - This callback is invoked after the function is completely /// finished. -bool ELFCodeEmitter::finishFunction(MachineFunction &F) { - // We now know the size of the function, add a symbol to represent it. - ELFWriter::ELFSym FnSym(F.getFunction()); +bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { + // Add a symbol to represent the function. + ELFSym FnSym(MF.getFunction()); // Figure out the binding (linkage) of the symbol. - switch (F.getFunction()->getLinkage()) { + switch (MF.getFunction()->getLinkage()) { default: // appending linkage is illegal for functions. assert(0 && "Unknown linkage type!"); case GlobalValue::ExternalLinkage: - FnSym.SetBind(ELFWriter::ELFSym::STB_GLOBAL); + FnSym.SetBind(ELFSym::STB_GLOBAL); break; case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - FnSym.SetBind(ELFWriter::ELFSym::STB_WEAK); + FnSym.SetBind(ELFSym::STB_WEAK); break; case GlobalValue::PrivateLinkage: assert (0 && "PrivateLinkage should not be in the symbol table."); case GlobalValue::InternalLinkage: - FnSym.SetBind(ELFWriter::ELFSym::STB_LOCAL); + FnSym.SetBind(ELFSym::STB_LOCAL); break; } - ES->Size = OutBuffer->size(); + // Set the symbol type as a function + FnSym.SetType(ELFSym::STT_FUNC); - FnSym.SetType(ELFWriter::ELFSym::STT_FUNC); FnSym.SectionIdx = ES->SectionIdx; - FnSym.Value = FnStart; // Value = Offset from start of Section. - FnSym.Size = OutBuffer->size()-FnStart; + FnSym.Size = CurBufferPtr-FnStartPtr; + + // Offset from start of Section + FnSym.Value = FnStartPtr-BufferBegin; // Finally, add it to the symtab. EW.SymbolTable.push_back(FnSym); + + // Update Section Size + ES->Size = CurBufferPtr - BufferBegin; return false; } diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h index 11ebcc8..e9ee936 100644 --- a/lib/CodeGen/ELFCodeEmitter.h +++ b/lib/CodeGen/ELFCodeEmitter.h @@ -21,11 +21,10 @@ namespace llvm { class ELFCodeEmitter : public MachineCodeEmitter { ELFWriter &EW; TargetMachine &TM; - ELFWriter::ELFSection *ES; // Section to write to. - std::vector<unsigned char> *OutBuffer; - size_t FnStart; + ELFSection *ES; // Section to write to. + uint8_t *FnStartPtr; public: - explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM), OutBuffer(0) {} + explicit ELFCodeEmitter(ELFWriter &ew) : EW(ew), TM(EW.TM) {} void startFunction(MachineFunction &F); bool finishFunction(MachineFunction &F); diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp index be8edce..24f12a3 100644 --- a/lib/CodeGen/ELFWriter.cpp +++ b/lib/CodeGen/ELFWriter.cpp @@ -33,6 +33,7 @@ #include "ELFWriter.h" #include "ELFCodeEmitter.h" +#include "ELF.h" #include "llvm/Module.h" #include "llvm/PassManager.h" #include "llvm/DerivedTypes.h" @@ -67,7 +68,8 @@ MachineCodeEmitter *llvm::AddELFWriter(PassManagerBase &PM, ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm) : MachineFunctionPass(&ID), O(o), TM(tm) { - e_flags = 0; // e_flags defaults to 0, no flags. + e_flags = 0; // e_flags defaults to 0, no flags. + e_machine = TM.getELFWriterInfo()->getEMachine(); is64Bit = TM.getTargetData()->getPointerSizeInBits() == 64; isLittleEndian = TM.getTargetData()->isLittleEndian(); @@ -90,24 +92,39 @@ bool ELFWriter::doInitialization(Module &M) { std::vector<unsigned char> &FH = FileHeader; OutputBuffer FHOut(FH, is64Bit, isLittleEndian); - FHOut.outbyte(0x7F); // EI_MAG0 - FHOut.outbyte('E'); // EI_MAG1 - FHOut.outbyte('L'); // EI_MAG2 - FHOut.outbyte('F'); // EI_MAG3 - FHOut.outbyte(is64Bit ? 2 : 1); // EI_CLASS - FHOut.outbyte(isLittleEndian ? 1 : 2); // EI_DATA - FHOut.outbyte(1); // EI_VERSION - FH.resize(16); // EI_PAD up to 16 bytes. - - // This should change for shared objects. - FHOut.outhalf(1); // e_type = ET_REL - FHOut.outhalf(TM.getELFWriterInfo()->getEMachine()); // target-defined - FHOut.outword(1); // e_version = 1 - FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file - FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o - - ELFHeader_e_shoff_Offset = FH.size(); - FHOut.outaddr(0); // e_shoff + unsigned ElfClass = is64Bit ? ELFCLASS64 : ELFCLASS32; + unsigned ElfEndian = isLittleEndian ? ELFDATA2LSB : ELFDATA2MSB; + + // ELF Header + // ---------- + // Fields e_shnum e_shstrndx are only known after all section have + // been emitted. They locations in the ouput buffer are recorded so + // to be patched up later. + // + // Note + // ---- + // FHOut.outaddr method behaves differently for ELF32 and ELF64 writing + // 4 bytes in the former and 8 in the last for *_off and *_addr elf types + + FHOut.outbyte(0x7f); // e_ident[EI_MAG0] + FHOut.outbyte('E'); // e_ident[EI_MAG1] + FHOut.outbyte('L'); // e_ident[EI_MAG2] + FHOut.outbyte('F'); // e_ident[EI_MAG3] + + FHOut.outbyte(ElfClass); // e_ident[EI_CLASS] + FHOut.outbyte(ElfEndian); // e_ident[EI_DATA] + FHOut.outbyte(EV_CURRENT); // e_ident[EI_VERSION] + + FH.resize(16); // e_ident[EI_NIDENT-EI_PAD] + + FHOut.outhalf(ET_REL); // e_type + FHOut.outhalf(e_machine); // e_machine = target + FHOut.outword(EV_CURRENT); // e_version + FHOut.outaddr(0); // e_entry = 0 -> no entry point in .o file + FHOut.outaddr(0); // e_phoff = 0 -> no program header for .o + + ELFHdr_e_shoff_Offset = FH.size(); + FHOut.outaddr(0); // e_shoff = sec hdr table off in bytes FHOut.outword(e_flags); // e_flags = whatever the target wants FHOut.outhalf(is64Bit ? 64 : 52); // e_ehsize = ELF header size @@ -115,14 +132,16 @@ bool ELFWriter::doInitialization(Module &M) { FHOut.outhalf(0); // e_phnum = # prog header entries = 0 FHOut.outhalf(is64Bit ? 64 : 40); // e_shentsize = sect hdr entry size + // e_shnum = # of section header ents + ELFHdr_e_shnum_Offset = FH.size(); + FHOut.outhalf(0); - ELFHeader_e_shnum_Offset = FH.size(); - FHOut.outhalf(0); // e_shnum = # of section header ents - ELFHeader_e_shstrndx_Offset = FH.size(); - FHOut.outhalf(0); // e_shstrndx = Section # of '.shstrtab' + // e_shstrndx = Section # of '.shstrtab' + ELFHdr_e_shstrndx_Offset = FH.size(); + FHOut.outhalf(0); // Add the null section, which is required to be first in the file. - getSection("", 0, 0); + getSection("", ELFSection::SHT_NULL, 0); // Start up the symbol table. The first entry in the symtab is the null // entry. @@ -334,7 +353,7 @@ void ELFWriter::EmitSectionTableStringTable() { // Now that we know which section number is the .shstrtab section, update the // e_shstrndx entry in the ELF header. OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); - FHOut.fixhalf(SHStrTab.SectionIdx, ELFHeader_e_shstrndx_Offset); + FHOut.fixhalf(SHStrTab.SectionIdx, ELFHdr_e_shstrndx_Offset); // Set the NameIdx of each section in the string table and emit the bytes for // the string table. @@ -386,11 +405,11 @@ void ELFWriter::OutputSectionsAndSectionTable() { // Now that we know where all of the sections will be emitted, set the e_shnum // entry in the ELF header. OutputBuffer FHOut(FileHeader, is64Bit, isLittleEndian); - FHOut.fixhalf(NumSections, ELFHeader_e_shnum_Offset); + FHOut.fixhalf(NumSections, ELFHdr_e_shnum_Offset); // Now that we know the offset in the file of the section table, update the // e_shoff address in the ELF header. - FHOut.fixaddr(FileOff, ELFHeader_e_shoff_Offset); + FHOut.fixaddr(FileOff, ELFHdr_e_shoff_Offset); // Now that we know all of the data in the file header, emit it and all of the // sections! diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h index 31aa05a..0389185 100644 --- a/lib/CodeGen/ELFWriter.h +++ b/lib/CodeGen/ELFWriter.h @@ -15,6 +15,7 @@ #define ELFWRITER_H #include "llvm/CodeGen/MachineFunctionPass.h" +#include "ELF.h" #include <list> #include <map> @@ -82,10 +83,8 @@ namespace llvm { /// doInitialization - Emit the file header and all of the global variables /// for the module to the ELF file. bool doInitialization(Module &M); - bool runOnMachineFunction(MachineFunction &MF); - /// doFinalization - Now that the module has been completely processed, emit /// the ELF file to 'O'. bool doFinalization(Module &M); @@ -96,53 +95,6 @@ namespace llvm { // as well!). DataBuffer FileHeader; - /// ELFSection - This struct contains information about each section that is - /// emitted to the file. This is eventually turned into the section header - /// table at the end of the file. - struct ELFSection { - std::string Name; // Name of the section. - unsigned NameIdx; // Index in .shstrtab of name, once emitted. - unsigned Type; - unsigned Flags; - uint64_t Addr; - unsigned Offset; - unsigned Size; - unsigned Link; - unsigned Info; - unsigned Align; - unsigned EntSize; - - /// SectionIdx - The number of the section in the Section Table. - /// - unsigned short SectionIdx; - - /// SectionData - The actual data for this section which we are building - /// up for emission to the file. - DataBuffer SectionData; - - enum { SHT_NULL = 0, SHT_PROGBITS = 1, SHT_SYMTAB = 2, SHT_STRTAB = 3, - SHT_RELA = 4, SHT_HASH = 5, SHT_DYNAMIC = 6, SHT_NOTE = 7, - SHT_NOBITS = 8, SHT_REL = 9, SHT_SHLIB = 10, SHT_DYNSYM = 11 }; - enum { SHN_UNDEF = 0, SHN_ABS = 0xFFF1, SHN_COMMON = 0xFFF2 }; - enum { // SHF - ELF Section Header Flags - SHF_WRITE = 1 << 0, // Writable - SHF_ALLOC = 1 << 1, // Mapped into the process addr space - SHF_EXECINSTR = 1 << 2, // Executable - SHF_MERGE = 1 << 4, // Might be merged if equal - SHF_STRINGS = 1 << 5, // Contains null-terminated strings - SHF_INFO_LINK = 1 << 6, // 'sh_info' contains SHT index - SHF_LINK_ORDER = 1 << 7, // Preserve order after combining - SHF_OS_NONCONFORMING = 1 << 8, // nonstandard OS support required - SHF_GROUP = 1 << 9, // Section is a member of a group - SHF_TLS = 1 << 10 // Section holds thread-local data - }; - - ELFSection(const std::string &name) - : Name(name), Type(0), Flags(0), Addr(0), Offset(0), Size(0), - Link(0), Info(0), Align(0), EntSize(0) { - } - }; - /// SectionList - This is the list of sections that we have emitted to the /// file. Once the file has been completely built, the section header table /// is constructed from this info. @@ -165,9 +117,15 @@ namespace llvm { SN->SectionIdx = NumSections++; SN->Type = Type; SN->Flags = Flags; + SN->Link = ELFSection::SHN_UNDEF; return *SN; } + ELFSection &getTextSection() { + return getSection(".text", ELFSection::SHT_PROGBITS, + ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC); + } + ELFSection &getDataSection() { return getSection(".data", ELFSection::SHT_PROGBITS, ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); @@ -177,34 +135,6 @@ namespace llvm { ELFSection::SHF_WRITE | ELFSection::SHF_ALLOC); } - /// ELFSym - This struct contains information about each symbol that is - /// added to logical symbol table for the module. This is eventually - /// turned into a real symbol table in the file. - struct ELFSym { - const GlobalValue *GV; // The global value this corresponds to. - unsigned NameIdx; // Index in .strtab of name, once emitted. - uint64_t Value; - unsigned Size; - unsigned char Info; - unsigned char Other; - unsigned short SectionIdx; - - enum { STB_LOCAL = 0, STB_GLOBAL = 1, STB_WEAK = 2 }; - enum { STT_NOTYPE = 0, STT_OBJECT = 1, STT_FUNC = 2, STT_SECTION = 3, - STT_FILE = 4 }; - ELFSym(const GlobalValue *gv) : GV(gv), Value(0), Size(0), Info(0), - Other(0), SectionIdx(0) {} - - void SetBind(unsigned X) { - assert(X == (X & 0xF) && "Bind value out of range!"); - Info = (Info & 0x0F) | (X << 4); - } - void SetType(unsigned X) { - assert(X == (X & 0xF) && "Type value out of range!"); - Info = (Info & 0xF0) | X; - } - }; - /// SymbolTable - This is the list of symbols we have emitted to the file. /// This actually gets rearranged before emission to the file (to put the /// local symbols first in the list). @@ -214,9 +144,9 @@ namespace llvm { // (e.g. the location of the section table). These members keep track of // the offset in ELFHeader of these various pieces to update and other // locations in the file. - unsigned ELFHeader_e_shoff_Offset; // e_shoff in ELF header. - unsigned ELFHeader_e_shstrndx_Offset; // e_shstrndx in ELF header. - unsigned ELFHeader_e_shnum_Offset; // e_shnum in ELF header. + unsigned ELFHdr_e_shoff_Offset; // e_shoff in ELF header. + unsigned ELFHdr_e_shstrndx_Offset; // e_shstrndx in ELF header. + unsigned ELFHdr_e_shnum_Offset; // e_shnum in ELF header. private: void EmitGlobal(GlobalVariable *GV); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index b8c8563..c351593 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -28,7 +28,6 @@ #include "llvm/Support/Streams.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" -#include <ostream> using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 8520888..804fae5 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -40,7 +40,6 @@ #include <queue> #include <memory> #include <cmath> -#include <iostream> using namespace llvm; @@ -399,7 +398,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { } ++NumCoalesce; - return SrcReg; + return PhysReg; } return Reg; @@ -543,13 +542,37 @@ void RALinScan::linearScan() // Ignore splited live intervals. if (!isPhys && vrm_->getPreSplitReg(cur.reg)) continue; + + // A register defined by an implicit_def can be liveout the def BB and livein + // to a use BB. Add it to the livein set of the use BB's. + if (!isPhys && cur.empty()) { + if (MachineInstr *DefMI = mri_->getVRegDef(cur.reg)) { + assert(DefMI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF); + MachineBasicBlock *DefMBB = DefMI->getParent(); + SmallPtrSet<MachineBasicBlock*, 4> Seen; + Seen.insert(DefMBB); + for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(cur.reg), + re = mri_->reg_end(); ri != re; ++ri) { + MachineInstr *UseMI = &*ri; + MachineBasicBlock *UseMBB = UseMI->getParent(); + if (Seen.insert(UseMBB)) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Adding a virtual register to livein set?"); + UseMBB->addLiveIn(Reg); + } + } + } + } for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); I != E; ++I) { const LiveRange &LR = *I; if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) + if (LiveInMBBs[i] != EntryMBB) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg) && + "Adding a virtual register to livein set?"); LiveInMBBs[i]->addLiveIn(Reg); + } LiveInMBBs.clear(); } } @@ -1192,7 +1215,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // The earliest start of a Spilled interval indicates up to where // in handled we need to roll back - unsigned earliestStart = cur->beginNumber(); LiveInterval *earliestStartInterval = cur; // Spill live intervals of virtual regs mapped to the physical register we @@ -1206,19 +1228,10 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) LiveInterval *sli = spillIs.back(); spillIs.pop_back(); DOUT << "\t\t\tspilling(a): " << *sli << '\n'; - earliestStart = std::min(earliestStart, sli->beginNumber()); earliestStartInterval = (earliestStartInterval->beginNumber() < sli->beginNumber()) ? earliestStartInterval : sli; - - if (earliestStartInterval->beginNumber()!=earliestStart) { - epicFail |= true; - std::cerr << "What the 1 - " - << "earliestStart = " << earliestStart - << "earliestStartInterval = " << earliestStartInterval->beginNumber() - << "\n"; - } - + std::vector<LiveInterval*> newIs; if (!NewSpillFramework) { newIs = li_->addIntervalsForSpills(*sli, spillIs, loopInfo, *vrm_); @@ -1229,20 +1242,12 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); - if (earliestStartInterval->beginNumber()!=earliestStart) { - epicFail |= true; - std::cerr << "What the 2 - " - << "earliestStart = " << earliestStart - << "earliestStartInterval = " << earliestStartInterval->beginNumber() - << "\n"; - } - if (epicFail) { //abort(); } } - earliestStart = earliestStartInterval->beginNumber(); + unsigned earliestStart = earliestStartInterval->beginNumber(); DOUT << "\t\trolling back to: " << earliestStart << '\n'; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 4c1710d..609ec82 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3626,30 +3626,29 @@ static SDNode *getBuildPairElt(SDNode *N, unsigned i) { SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, MVT VT) { assert(N->getOpcode() == ISD::BUILD_PAIR); - SDNode *LD1 = getBuildPairElt(N, 0); - if (!ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) + LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); + LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); + if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) return SDValue(); MVT LD1VT = LD1->getValueType(0); - SDNode *LD2 = getBuildPairElt(N, 1); const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() && // If both are volatile this would reduce the number of volatile loads. // If one is volatile it might be ok, but play conservative and bail out. - !cast<LoadSDNode>(LD1)->isVolatile() && - !cast<LoadSDNode>(LD2)->isVolatile() && + !LD1->isVolatile() && + !LD2->isVolatile() && TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) { - LoadSDNode *LD = cast<LoadSDNode>(LD1); - unsigned Align = LD->getAlignment(); + unsigned Align = LD1->getAlignment(); unsigned NewAlign = TLI.getTargetData()-> getABITypeAlignment(VT.getTypeForMVT()); if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), - LD->getSrcValue(), LD->getSrcValueOffset(), - false, Align); + return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + LD1->getBasePtr(), LD1->getSrcValue(), + LD1->getSrcValueOffset(), false, Align); } return SDValue(); @@ -4019,6 +4018,9 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul A, 0) -> 0 if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) return N1; + // fold (fmul A, 0) -> 0, vector edition. + if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 6becff3..4a7dbeb 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -639,18 +639,18 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { bool FastISel::SelectOperator(User *I, unsigned Opcode) { switch (Opcode) { - case Instruction::Add: { - ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FADD : ISD::ADD; - return SelectBinaryOp(I, Opc); - } - case Instruction::Sub: { - ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FSUB : ISD::SUB; - return SelectBinaryOp(I, Opc); - } - case Instruction::Mul: { - ISD::NodeType Opc = I->getType()->isFPOrFPVector() ? ISD::FMUL : ISD::MUL; - return SelectBinaryOp(I, Opc); - } + case Instruction::Add: + return SelectBinaryOp(I, ISD::ADD); + case Instruction::FAdd: + return SelectBinaryOp(I, ISD::FADD); + case Instruction::Sub: + return SelectBinaryOp(I, ISD::SUB); + case Instruction::FSub: + return SelectBinaryOp(I, ISD::FSUB); + case Instruction::Mul: + return SelectBinaryOp(I, ISD::MUL); + case Instruction::FMul: + return SelectBinaryOp(I, ISD::FMUL); case Instruction::SDiv: return SelectBinaryOp(I, ISD::SDIV); case Instruction::UDiv: diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2cd67e6..5ae183e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -116,6 +116,8 @@ private: /// result. SDValue LegalizeOp(SDValue O); + SDValue OptimizeFloatStore(StoreSDNode *ST); + /// PerformInsertVectorEltInMemory - Some target cannot handle a variable /// insertion index for the INSERT_VECTOR_ELT instruction. In this case, it /// is necessary to spill the vector being inserted into to memory, perform @@ -165,6 +167,7 @@ private: SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); + SDValue ExpandVectorBuildThroughStack(SDNode* Node); void ExpandNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); void PromoteNode(SDNode *Node, SmallVectorImpl<SDValue> &Results); @@ -681,6 +684,59 @@ ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { return PerformInsertVectorEltInMemory(Vec, Val, Idx, dl); } +SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { + // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' + // FIXME: We shouldn't do this for TargetConstantFP's. + // FIXME: move this to the DAG Combiner! Note that we can't regress due + // to phase ordering between legalized code and the dag combiner. This + // probably means that we need to integrate dag combiner and legalizer + // together. + // We generally can't do this one for long doubles. + SDValue Tmp1 = ST->getChain(); + SDValue Tmp2 = ST->getBasePtr(); + SDValue Tmp3; + int SVOffset = ST->getSrcValueOffset(); + unsigned Alignment = ST->getAlignment(); + bool isVolatile = ST->isVolatile(); + DebugLoc dl = ST->getDebugLoc(); + if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { + if (CFP->getValueType(0) == MVT::f32 && + getTypeAction(MVT::i32) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF(). + bitcastToAPInt().zextOrTrunc(32), + MVT::i32); + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + } else if (CFP->getValueType(0) == MVT::f64) { + // If this target supports 64-bit registers, do a single 64-bit store. + if (getTypeAction(MVT::i64) == Legal) { + Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). + zextOrTrunc(64), MVT::i64); + return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { + // Otherwise, if the target supports 32-bit registers, use 2 32-bit + // stores. If the target supports neither 32- nor 64-bits, this + // xform is certainly not worth it. + const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); + SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); + SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); + if (TLI.isBigEndian()) std::swap(Lo, Hi); + + Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), + SVOffset, isVolatile, Alignment); + Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, + DAG.getIntPtrConstant(4)); + Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, + isVolatile, MinAlign(Alignment, 4U)); + + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + } + } + } + return SDValue(); +} + /// LegalizeOp - We know that the specified value has a legal type, and /// that its operands are legal. Now ensure that the operation itself /// is legal, recursively ensuring that the operands' operations remain @@ -1293,50 +1349,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { bool isVolatile = ST->isVolatile(); if (!ST->isTruncatingStore()) { - // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' - // FIXME: We shouldn't do this for TargetConstantFP's. - // FIXME: move this to the DAG Combiner! Note that we can't regress due - // to phase ordering between legalized code and the dag combiner. This - // probably means that we need to integrate dag combiner and legalizer - // together. - // We generally can't do this one for long doubles. - if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) { - if (CFP->getValueType(0) == MVT::f32 && - getTypeAction(MVT::i32) == Legal) { - Tmp3 = DAG.getConstant(CFP->getValueAPF(). - bitcastToAPInt().zextOrTrunc(32), - MVT::i32); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); - break; - } else if (CFP->getValueType(0) == MVT::f64) { - // If this target supports 64-bit registers, do a single 64-bit store. - if (getTypeAction(MVT::i64) == Legal) { - Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). - zextOrTrunc(64), MVT::i64); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); - break; - } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) { - // Otherwise, if the target supports 32-bit registers, use 2 32-bit - // stores. If the target supports neither 32- nor 64-bits, this - // xform is certainly not worth it. - const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt(); - SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32); - SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32); - if (TLI.isBigEndian()) std::swap(Lo, Hi); - - Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(), - SVOffset, isVolatile, Alignment); - Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, - DAG.getIntPtrConstant(4)); - Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4, - isVolatile, MinAlign(Alignment, 4U)); - - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - break; - } - } + if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { + Result = SDValue(OptStore, 0); + break; } { @@ -1510,6 +1525,46 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0); } +SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { + // We can't handle this case efficiently. Allocate a sufficiently + // aligned object on the stack, store each element into it, then load + // the result as a vector. + // Create the stack frame object. + MVT VT = Node->getValueType(0); + MVT OpVT = Node->getOperand(0).getValueType(); + DebugLoc dl = Node->getDebugLoc(); + SDValue FIPtr = DAG.CreateStackTemporary(VT); + int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); + const Value *SV = PseudoSourceValue::getFixedStack(FI); + + // Emit a store of each element to the stack slot. + SmallVector<SDValue, 8> Stores; + unsigned TypeByteSize = OpVT.getSizeInBits() / 8; + // Store (in the right endianness) the elements to memory. + for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { + // Ignore undef elements. + if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; + + unsigned Offset = TypeByteSize*i; + + SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); + Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); + + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), + Idx, SV, Offset)); + } + + SDValue StoreChain; + if (!Stores.empty()) // Not all undef elements? + StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + &Stores[0], Stores.size()); + else + StoreChain = DAG.getEntryNode(); + + // Result is a load from the stack slot. + return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); +} + SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1 = Node->getOperand(0); @@ -1853,40 +1908,8 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { } } - // Otherwise, we can't handle this case efficiently. Allocate a sufficiently - // aligned object on the stack, store each element into it, then load - // the result as a vector. - // Create the stack frame object. - SDValue FIPtr = DAG.CreateStackTemporary(VT); - int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex(); - const Value *SV = PseudoSourceValue::getFixedStack(FI); - - // Emit a store of each element to the stack slot. - SmallVector<SDValue, 8> Stores; - unsigned TypeByteSize = OpVT.getSizeInBits() / 8; - // Store (in the right endianness) the elements to memory. - for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) { - // Ignore undef elements. - if (Node->getOperand(i).getOpcode() == ISD::UNDEF) continue; - - unsigned Offset = TypeByteSize*i; - - SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType()); - Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx); - - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), - Idx, SV, Offset)); - } - - SDValue StoreChain; - if (!Stores.empty()) // Not all undef elements? - StoreChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &Stores[0], Stores.size()); - else - StoreChain = DAG.getEntryNode(); - - // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0); + // Otherwise, we can't handle this case efficiently. + return ExpandVectorBuildThroughStack(Node); } // ExpandLibCall - Expand a node into a call to a libcall. If the result value @@ -2437,23 +2460,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0))); break; case ISD::CONCAT_VECTORS: { - // Use extract/insert/build vector for now. We might try to be - // more clever later. - SmallVector<SDValue, 8> Ops; - unsigned NumOperands = Node->getNumOperands(); - for (unsigned i=0; i < NumOperands; ++i) { - SDValue SubOp = Node->getOperand(i); - MVT VVT = SubOp.getNode()->getValueType(0); - MVT EltVT = VVT.getVectorElementType(); - unsigned NumSubElem = VVT.getVectorNumElements(); - for (unsigned j=0; j < NumSubElem; ++j) { - Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp, - DAG.getIntPtrConstant(j))); - } - } - Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Ops[0], Ops.size()); - Results.push_back(Tmp1); + Results.push_back(ExpandVectorBuildThroughStack(Node)); break; } case ISD::SCALAR_TO_VECTOR: diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index eb9342c..0c826f6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -356,13 +356,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { unsigned NewOpc = N->getOpcode(); DebugLoc dl = N->getDebugLoc(); - // If we're promoting a UINT to a larger size, check to see if the new node - // will be legal. If it isn't, check to see if FP_TO_SINT is legal, since - // we can use that instead. This allows us to generate better code for - // FP_TO_UINT for small destination sizes on targets where FP_TO_UINT is not - // legal, such as PowerPC. + // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is + // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT + // and SINT conversions are Custom, there is no way to tell which is preferable. + // We choose SINT because that's the right thing on PPC.) if (N->getOpcode() == ISD::FP_TO_UINT && - !TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NVT) && + !TLI.isOperationLegal(ISD::FP_TO_UINT, NVT) && TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) NewOpc = ISD::FP_TO_SINT; @@ -1747,7 +1746,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::SDIV_I16; + else if (VT == MVT::i32) LC = RTLIB::SDIV_I32; else if (VT == MVT::i64) LC = RTLIB::SDIV_I64; @@ -1909,7 +1910,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::SREM_I16; + else if (VT == MVT::i32) LC = RTLIB::SREM_I32; else if (VT == MVT::i64) LC = RTLIB::SREM_I64; @@ -1938,7 +1941,9 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::UDIV_I16; + else if (VT == MVT::i32) LC = RTLIB::UDIV_I32; else if (VT == MVT::i64) LC = RTLIB::UDIV_I64; @@ -1956,7 +1961,9 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, DebugLoc dl = N->getDebugLoc(); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - if (VT == MVT::i32) + if (VT == MVT::i16) + LC = RTLIB::UREM_I16; + else if (VT == MVT::i32) LC = RTLIB::UREM_I32; else if (VT == MVT::i64) LC = RTLIB::UREM_I64; diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index df9af21..335c73c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -129,6 +129,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { if (!HasVectorValue) return TranslateLegalizeResults(Op, Result); + MVT QueryType; switch (Op.getOpcode()) { default: return TranslateLegalizeResults(Op, Result); @@ -162,8 +163,6 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::ANY_EXTEND: case ISD::TRUNCATE: case ISD::SIGN_EXTEND: - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FNEG: @@ -183,10 +182,15 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + QueryType = Node->getValueType(0); + break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + QueryType = Node->getOperand(0).getValueType(); break; } - switch (TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0))) { + switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: // "Promote" the operation by bitcasting Result = PromoteVectorOp(Op); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 195896e..a9adce8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -154,7 +154,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { // Do not accept an all-undef vector. if (i == e) return false; - // Do not accept build_vectors that aren't all constants or which have non-~0 + // Do not accept build_vectors that aren't all constants or which have non-0 // elements. SDValue Zero = N->getOperand(i); if (isa<ConstantSDNode>(Zero)) { @@ -166,7 +166,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { } else return false; - // Okay, we have at least one ~0 value, check to see if the rest match or are + // Okay, we have at least one 0 value, check to see if the rest match or are // undefs. for (++i; i != e; ++i) if (N->getOperand(i) != Zero && @@ -2807,16 +2807,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, MVT VT, case ISD::ADDC: case ISD::ADDE: case ISD::SUB: - case ISD::FADD: - case ISD::FSUB: - case ISD::FMUL: - case ISD::FDIV: - case ISD::FREM: case ISD::UDIV: case ISD::SDIV: case ISD::UREM: case ISD::SREM: return N2; // fold op(arg1, undef) -> undef + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + if (UnsafeFPMath) + return N2; + break; case ISD::MUL: case ISD::AND: case ISD::SRL: @@ -3059,7 +3062,7 @@ bool MeetsMaxMemopRequirement(std::vector<MVT> &MemOps, isSrcStr = isMemSrcFromString(Src, Str); bool isSrcConst = isa<ConstantSDNode>(Src); bool AllowUnalign = TLI.allowsUnalignedMemoryAccesses(); - MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr); + MVT VT = TLI.getOptimalMemOpType(Size, Align, isSrcConst, isSrcStr, DAG); if (VT != MVT::iAny) { unsigned NewAlign = (unsigned) TLI.getTargetData()->getABITypeAlignment(VT.getTypeForMVT()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index 889d7f5..93750d6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -842,20 +842,6 @@ void SelectionDAGLowering::visit(unsigned Opcode, User &I) { } } -void SelectionDAGLowering::visitAdd(User &I) { - if (I.getType()->isFPOrFPVector()) - visitBinary(I, ISD::FADD); - else - visitBinary(I, ISD::ADD); -} - -void SelectionDAGLowering::visitMul(User &I) { - if (I.getType()->isFPOrFPVector()) - visitBinary(I, ISD::FMUL); - else - visitBinary(I, ISD::MUL); -} - SDValue SelectionDAGLowering::getValue(const Value *V) { SDValue &N = NodeMap[V]; if (N.getNode()) return N; @@ -2161,37 +2147,33 @@ void SelectionDAGLowering::visitSwitch(SwitchInst &SI) { } -void SelectionDAGLowering::visitSub(User &I) { +void SelectionDAGLowering::visitFSub(User &I) { // -0.0 - X --> fneg const Type *Ty = I.getType(); if (isa<VectorType>(Ty)) { if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) { const VectorType *DestTy = cast<VectorType>(I.getType()); const Type *ElTy = DestTy->getElementType(); - if (ElTy->isFloatingPoint()) { - unsigned VL = DestTy->getNumElements(); - std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); - Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); - if (CV == CNZ) { - SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), - Op2.getValueType(), Op2)); - return; - } - } - } - } - if (Ty->isFloatingPoint()) { - if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) - if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + unsigned VL = DestTy->getNumElements(); + std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy)); + Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size()); + if (CV == CNZ) { SDValue Op2 = getValue(I.getOperand(1)); setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), Op2.getValueType(), Op2)); return; } + } } + if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0))) + if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) { + SDValue Op2 = getValue(I.getOperand(1)); + setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + Op2.getValueType(), Op2)); + return; + } - visitBinary(I, Ty->isFPOrFPVector() ? ISD::FSUB : ISD::SUB); + visitBinary(I, ISD::FSUB); } void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h index 578aa591..057c841 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h @@ -469,9 +469,12 @@ private: void visitBinary(User &I, unsigned OpCode); void visitShift(User &I, unsigned Opcode); - void visitAdd(User &I); - void visitSub(User &I); - void visitMul(User &I); + void visitAdd(User &I) { visitBinary(I, ISD::ADD); } + void visitFAdd(User &I) { visitBinary(I, ISD::FADD); } + void visitSub(User &I) { visitBinary(I, ISD::SUB); } + void visitFSub(User &I); + void visitMul(User &I) { visitBinary(I, ISD::MUL); } + void visitFMul(User &I) { visitBinary(I, ISD::FMUL); } void visitURem(User &I) { visitBinary(I, ISD::UREM); } void visitSRem(User &I) { visitBinary(I, ISD::SREM); } void visitFRem(User &I) { visitBinary(I, ISD::FREM); } diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3334e53..ab4cd51 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2070,13 +2070,13 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, } -/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is -/// loading 'Bytes' bytes from a location that is 'Dist' units away from the -/// location that the 'Base' load is loading from. -bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, - unsigned Bytes, int Dist, +/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a +/// location that is 'Dist' units away from the location that the 'Base' load +/// is loading from. +bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, + unsigned Bytes, int Dist, const MachineFrameInfo *MFI) const { - if (LD->getOperand(0).getNode() != Base->getOperand(0).getNode()) + if (LD->getChain() != Base->getChain()) return false; MVT VT = LD->getValueType(0); if (VT.getSizeInBits() / 8 != Bytes) @@ -2094,6 +2094,11 @@ bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, if (FS != BFS || FS != (int)Bytes) return false; return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); } + if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) { + ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1)); + if (V && (V->getSExtValue() == Dist*Bytes)) + return true; + } GlobalValue *GV1 = NULL; GlobalValue *GV2 = NULL; diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index c31f622..bd6584a 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -33,99 +33,21 @@ STATISTIC(NumSUnfold , "Number of stores unfolded"); STATISTIC(NumModRefUnfold, "Number of modref unfolded"); namespace { - enum RewriterName { simple, local, trivial }; + enum RewriterName { local, trivial }; } static cl::opt<RewriterName> RewriterOpt("rewriter", cl::desc("Rewriter to use: (default: local)"), cl::Prefix, - cl::values(clEnumVal(simple, "simple rewriter"), - clEnumVal(local, "local rewriter"), + cl::values(clEnumVal(local, "local rewriter"), clEnumVal(trivial, "trivial rewriter"), clEnumValEnd), cl::init(local)); VirtRegRewriter::~VirtRegRewriter() {} - -// ****************************** // -// Simple Spiller Implementation // -// ****************************** // - -struct VISIBILITY_HIDDEN SimpleRewriter : public VirtRegRewriter { - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) { - DOUT << "********** REWRITE MACHINE CODE **********\n"; - DOUT << "********** Function: " << MF.getFunction()->getName() << '\n'; - const TargetMachine &TM = MF.getTarget(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); - const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); - - - // LoadedRegs - Keep track of which vregs are loaded, so that we only load - // each vreg once (in the case where a spilled vreg is used by multiple - // operands). This is always smaller than the number of operands to the - // current machine instr, so it should be small. - std::vector<unsigned> LoadedRegs; - - for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); - MBBI != E; ++MBBI) { - DOUT << MBBI->getBasicBlock()->getName() << ":\n"; - MachineBasicBlock &MBB = *MBBI; - for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); - MII != E; ++MII) { - MachineInstr &MI = *MII; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (MO.isReg() && MO.getReg()) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned VirtReg = MO.getReg(); - unsigned SubIdx = MO.getSubReg(); - unsigned PhysReg = VRM.getPhys(VirtReg); - unsigned RReg = SubIdx ? TRI.getSubReg(PhysReg, SubIdx) : PhysReg; - if (!VRM.isAssignedReg(VirtReg)) { - int StackSlot = VRM.getStackSlot(VirtReg); - const TargetRegisterClass* RC = - MF.getRegInfo().getRegClass(VirtReg); - - if (MO.isUse() && - std::find(LoadedRegs.begin(), LoadedRegs.end(), VirtReg) - == LoadedRegs.end()) { - TII.loadRegFromStackSlot(MBB, &MI, PhysReg, StackSlot, RC); - MachineInstr *LoadMI = prior(MII); - VRM.addSpillSlotUse(StackSlot, LoadMI); - LoadedRegs.push_back(VirtReg); - ++NumLoads; - DOUT << '\t' << *LoadMI; - } - - if (MO.isDef()) { - TII.storeRegToStackSlot(MBB, next(MII), PhysReg, true, - StackSlot, RC); - MachineInstr *StoreMI = next(MII); - VRM.addSpillSlotUse(StackSlot, StoreMI); - ++NumStores; - } - } - MF.getRegInfo().setPhysRegUsed(RReg); - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } else { - MF.getRegInfo().setPhysRegUsed(MO.getReg()); - } - } - } - - DOUT << '\t' << MI; - LoadedRegs.clear(); - } - } - return true; - } -}; /// This class is intended for use with the new spilling framework only. It /// rewrites vreg def/uses to use the assigned preg, but does not insert any @@ -2231,8 +2153,6 @@ llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { default: assert(0 && "Unreachable!"); case local: return new LocalRewriter(); - case simple: - return new SimpleRewriter(); case trivial: return new TrivialRewriter(); } |