diff options
Diffstat (limited to 'contrib/llvm/include/llvm/Bitcode/BitstreamReader.h')
-rw-r--r-- | contrib/llvm/include/llvm/Bitcode/BitstreamReader.h | 333 |
1 files changed, 147 insertions, 186 deletions
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h index 6f478b7..865a3e6 100644 --- a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h +++ b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h @@ -17,39 +17,37 @@ #include "llvm/Bitcode/BitCodes.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/StreamableMemoryObject.h" +#include "llvm/Support/StreamingMemoryObject.h" #include <climits> #include <string> #include <vector> namespace llvm { - class Deserializer; +class Deserializer; -/// BitstreamReader - This class is used to read from an LLVM bitcode stream, -/// maintaining information that is global to decoding the entire file. While -/// a file is being read, multiple cursors can be independently advanced or -/// skipped around within the file. These are represented by the -/// BitstreamCursor class. +/// This class is used to read from an LLVM bitcode stream, maintaining +/// information that is global to decoding the entire file. While a file is +/// being read, multiple cursors can be independently advanced or skipped around +/// within the file. These are represented by the BitstreamCursor class. class BitstreamReader { public: - /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks. - /// These describe abbreviations that all blocks of the specified ID inherit. + /// This contains information emitted to BLOCKINFO_BLOCK blocks. These + /// describe abbreviations that all blocks of the specified ID inherit. struct BlockInfo { unsigned BlockID; - std::vector<BitCodeAbbrev*> Abbrevs; + std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs; std::string Name; std::vector<std::pair<unsigned, std::string> > RecordNames; }; private: - std::unique_ptr<StreamableMemoryObject> BitcodeBytes; + std::unique_ptr<MemoryObject> BitcodeBytes; std::vector<BlockInfo> BlockInfoRecords; - /// IgnoreBlockInfoNames - This is set to true if we don't care about the - /// block/record name information in the BlockInfo block. Only llvm-bcanalyzer - /// uses this. + /// This is set to true if we don't care about the block/record name + /// information in the BlockInfo block. Only llvm-bcanalyzer uses this. bool IgnoreBlockInfoNames; BitstreamReader(const BitstreamReader&) LLVM_DELETED_FUNCTION; @@ -58,13 +56,24 @@ public: BitstreamReader() : IgnoreBlockInfoNames(true) { } - BitstreamReader(const unsigned char *Start, const unsigned char *End) { - IgnoreBlockInfoNames = true; + BitstreamReader(const unsigned char *Start, const unsigned char *End) + : IgnoreBlockInfoNames(true) { init(Start, End); } - BitstreamReader(StreamableMemoryObject *bytes) { - BitcodeBytes.reset(bytes); + BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes) + : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {} + + BitstreamReader(BitstreamReader &&Other) { + *this = std::move(Other); + } + + BitstreamReader &operator=(BitstreamReader &&Other) { + BitcodeBytes = std::move(Other.BitcodeBytes); + // Explicitly swap block info, so that nothing gets destroyed twice. + std::swap(BlockInfoRecords, Other.BlockInfoRecords); + IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames; + return *this; } void init(const unsigned char *Start, const unsigned char *End) { @@ -72,22 +81,9 @@ public: BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End)); } - StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; } + MemoryObject &getBitcodeBytes() { return *BitcodeBytes; } - ~BitstreamReader() { - // Free the BlockInfoRecords. - while (!BlockInfoRecords.empty()) { - BlockInfo &Info = BlockInfoRecords.back(); - // Free blockinfo abbrev info. - for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size()); - i != e; ++i) - Info.Abbrevs[i]->dropRef(); - BlockInfoRecords.pop_back(); - } - } - - /// CollectBlockInfoNames - This is called by clients that want block/record - /// name information. + /// This is called by clients that want block/record name information. void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; } bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; } @@ -95,13 +91,13 @@ public: // Block Manipulation //===--------------------------------------------------------------------===// - /// hasBlockInfoRecords - Return true if we've already read and processed the - /// block info block for this Bitstream. We only process it for the first - /// cursor that walks over it. + /// Return true if we've already read and processed the block info block for + /// this Bitstream. We only process it for the first cursor that walks over + /// it. bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); } - /// getBlockInfo - If there is block info for the specified ID, return it, - /// otherwise return null. + /// If there is block info for the specified ID, return it, otherwise return + /// null. const BlockInfo *getBlockInfo(unsigned BlockID) const { // Common case, the most recent entry matches BlockID. if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID) @@ -123,23 +119,26 @@ public: BlockInfoRecords.back().BlockID = BlockID; return BlockInfoRecords.back(); } -}; + /// Takes block info from the other bitstream reader. + /// + /// This is a "take" operation because BlockInfo records are non-trivial, and + /// indeed rather expensive. + void takeBlockInfo(BitstreamReader &&Other) { + assert(!hasBlockInfoRecords()); + BlockInfoRecords = std::move(Other.BlockInfoRecords); + } +}; -/// BitstreamEntry - When advancing through a bitstream cursor, each advance can -/// discover a few different kinds of entries: -/// Error - Malformed bitcode was found. -/// EndBlock - We've reached the end of the current block, (or the end of the -/// file, which is treated like a series of EndBlock records. -/// SubBlock - This is the start of a new subblock of a specific ID. -/// Record - This is a record with a specific AbbrevID. -/// +/// When advancing through a bitstream cursor, each advance can discover a few +/// different kinds of entries: struct BitstreamEntry { enum { - Error, - EndBlock, - SubBlock, - Record + Error, // Malformed bitcode was found. + EndBlock, // We've reached the end of the current block, (or the end of the + // file, which is treated like a series of EndBlock records. + SubBlock, // This is the start of a new subblock of a specific ID. + Record // This is a record with a specific AbbrevID. } Kind; unsigned ID; @@ -158,9 +157,9 @@ struct BitstreamEntry { } }; -/// BitstreamCursor - This represents a position within a bitcode file. There -/// may be multiple independent cursors reading within one bitstream, each -/// maintaining their own local state. +/// This represents a position within a bitcode file. There may be multiple +/// independent cursors reading within one bitstream, each maintaining their own +/// local state. /// /// Unlike iterators, BitstreamCursors are heavy-weight objects that should not /// be passed by value. @@ -169,92 +168,74 @@ class BitstreamCursor { BitstreamReader *BitStream; size_t NextChar; + // The size of the bicode. 0 if we don't know it yet. + size_t Size; - /// CurWord/word_t - This is the current data we have pulled from the stream - /// but have not returned to the client. This is specifically and - /// intentionally defined to follow the word size of the host machine for - /// efficiency. We use word_t in places that are aware of this to make it - /// perfectly explicit what is going on. - typedef uint32_t word_t; + /// This is the current data we have pulled from the stream but have not + /// returned to the client. This is specifically and intentionally defined to + /// follow the word size of the host machine for efficiency. We use word_t in + /// places that are aware of this to make it perfectly explicit what is going + /// on. + typedef size_t word_t; word_t CurWord; - /// BitsInCurWord - This is the number of bits in CurWord that are valid. This - /// is always from [0...31/63] inclusive (depending on word size). + /// This is the number of bits in CurWord that are valid. This is always from + /// [0...bits_of(size_t)-1] inclusive. unsigned BitsInCurWord; - // CurCodeSize - This is the declared size of code values used for the current - // block, in bits. + // This is the declared size of code values used for the current block, in + // bits. unsigned CurCodeSize; - /// CurAbbrevs - Abbrevs installed at in this block. - std::vector<BitCodeAbbrev*> CurAbbrevs; + /// Abbrevs installed at in this block. + std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> CurAbbrevs; struct Block { unsigned PrevCodeSize; - std::vector<BitCodeAbbrev*> PrevAbbrevs; + std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs; explicit Block(unsigned PCS) : PrevCodeSize(PCS) {} }; - /// BlockScope - This tracks the codesize of parent blocks. + /// This tracks the codesize of parent blocks. SmallVector<Block, 8> BlockScope; public: - BitstreamCursor() : BitStream(nullptr), NextChar(0) {} - BitstreamCursor(const BitstreamCursor &RHS) - : BitStream(nullptr), NextChar(0) { - operator=(RHS); - } + BitstreamCursor() { init(nullptr); } - explicit BitstreamCursor(BitstreamReader &R) : BitStream(&R) { - NextChar = 0; - CurWord = 0; - BitsInCurWord = 0; - CurCodeSize = 2; - } + explicit BitstreamCursor(BitstreamReader &R) { init(&R); } - void init(BitstreamReader &R) { + void init(BitstreamReader *R) { freeState(); - BitStream = &R; + BitStream = R; NextChar = 0; - CurWord = 0; + Size = 0; BitsInCurWord = 0; CurCodeSize = 2; } - ~BitstreamCursor() { - freeState(); - } - - void operator=(const BitstreamCursor &RHS); - void freeState(); - bool isEndPos(size_t pos) { - return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos)); - } - bool canSkipToPos(size_t pos) const { // pos can be skipped to if it is a valid address or one byte past the end. return pos == 0 || BitStream->getBitcodeBytes().isValidAddress( static_cast<uint64_t>(pos - 1)); } - uint32_t getWord(size_t pos) { - uint8_t buf[4] = { 0xFF, 0xFF, 0xFF, 0xFF }; - BitStream->getBitcodeBytes().readBytes(pos, sizeof(buf), buf); - return *reinterpret_cast<support::ulittle32_t *>(buf); - } - bool AtEndOfStream() { - return BitsInCurWord == 0 && isEndPos(NextChar); + if (BitsInCurWord != 0) + return false; + if (Size != 0) + return Size == NextChar; + fillCurWord(); + return BitsInCurWord == 0; } - /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #. + /// Return the number of bits used to encode an abbrev #. unsigned getAbbrevIDWidth() const { return CurCodeSize; } - /// GetCurrentBitNo - Return the bit # of the bit we are reading. + /// Return the bit # of the bit we are reading. uint64_t GetCurrentBitNo() const { return NextChar*CHAR_BIT - BitsInCurWord; } @@ -268,19 +249,17 @@ public: /// Flags that modify the behavior of advance(). enum { - /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does - /// not automatically pop the block scope when the end of a block is - /// reached. + /// If this flag is used, the advance() method does not automatically pop + /// the block scope when the end of a block is reached. AF_DontPopBlockAtEnd = 1, - /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are - /// returned just like normal records. + /// If this flag is used, abbrev entries are returned just like normal + /// records. AF_DontAutoprocessAbbrevs = 2 }; - /// advance - Advance the current bitstream, returning the next entry in the - /// stream. - BitstreamEntry advance(unsigned Flags = 0) { + /// Advance the current bitstream, returning the next entry in the stream. + BitstreamEntry advance(unsigned Flags = 0) { while (1) { unsigned Code = ReadCode(); if (Code == bitc::END_BLOCK) { @@ -305,8 +284,8 @@ public: } } - /// advanceSkippingSubblocks - This is a convenience function for clients that - /// don't expect any subblocks. This just skips over them automatically. + /// This is a convenience function for clients that don't expect any + /// subblocks. This just skips over them automatically. BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) { while (1) { // If we found a normal entry, return it. @@ -320,7 +299,7 @@ public: } } - /// JumpToBit - Reset the stream to the specified bit number. + /// Reset the stream to the specified bit number. void JumpToBit(uint64_t BitNo) { uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1); unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1)); @@ -329,77 +308,74 @@ public: // Move the cursor to the right word. NextChar = ByteNo; BitsInCurWord = 0; - CurWord = 0; // Skip over any bits that are already consumed. - if (WordBitNo) { - if (sizeof(word_t) > 4) - Read64(WordBitNo); - else - Read(WordBitNo); + if (WordBitNo) + Read(WordBitNo); + } + + void fillCurWord() { + assert(Size == 0 || NextChar < (unsigned)Size); + + // Read the next word from the stream. + uint8_t Array[sizeof(word_t)] = {0}; + + uint64_t BytesRead = + BitStream->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar); + + // If we run out of data, stop at the end of the stream. + if (BytesRead == 0) { + Size = NextChar; + return; } + + CurWord = + support::endian::read<word_t, support::little, support::unaligned>( + Array); + NextChar += BytesRead; + BitsInCurWord = BytesRead * 8; } + word_t Read(unsigned NumBits) { + static const unsigned BitsInWord = sizeof(word_t) * 8; - uint32_t Read(unsigned NumBits) { - assert(NumBits && NumBits <= 32 && - "Cannot return zero or more than 32 bits!"); + assert(NumBits && NumBits <= BitsInWord && + "Cannot return zero or more than BitsInWord bits!"); + + static const unsigned Mask = sizeof(word_t) > 4 ? 0x3f : 0x1f; // If the field is fully contained by CurWord, return it quickly. if (BitsInCurWord >= NumBits) { - uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits)); - CurWord >>= NumBits; + word_t R = CurWord & (~word_t(0) >> (BitsInWord - NumBits)); + + // Use a mask to avoid undefined behavior. + CurWord >>= (NumBits & Mask); + BitsInCurWord -= NumBits; return R; } - // If we run out of data, stop at the end of the stream. - if (isEndPos(NextChar)) { - CurWord = 0; - BitsInCurWord = 0; - return 0; - } - - uint32_t R = uint32_t(CurWord); - - // Read the next word from the stream. - uint8_t Array[sizeof(word_t)] = {0}; + word_t R = BitsInCurWord ? CurWord : 0; + unsigned BitsLeft = NumBits - BitsInCurWord; - BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array), Array); + fillCurWord(); - // Handle big-endian byte-swapping if necessary. - support::detail::packed_endian_specific_integral - <word_t, support::little, support::unaligned> EndianValue; - memcpy(&EndianValue, Array, sizeof(Array)); + // If we run out of data, stop at the end of the stream. + if (BitsLeft > BitsInCurWord) + return 0; - CurWord = EndianValue; + word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft)); - NextChar += sizeof(word_t); + // Use a mask to avoid undefined behavior. + CurWord >>= (BitsLeft & Mask); - // Extract NumBits-BitsInCurWord from what we just read. - unsigned BitsLeft = NumBits-BitsInCurWord; + BitsInCurWord -= BitsLeft; - // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive. - R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft))) - << BitsInCurWord); + R |= R2 << (NumBits - BitsLeft); - // BitsLeft bits have just been used up from CurWord. BitsLeft is in the - // range [1..32]/[1..64] so be careful how we shift. - if (BitsLeft != sizeof(word_t)*8) - CurWord >>= BitsLeft; - else - CurWord = 0; - BitsInCurWord = sizeof(word_t)*8-BitsLeft; return R; } - uint64_t Read64(unsigned NumBits) { - if (NumBits <= 32) return Read(NumBits); - - uint64_t V = Read(32); - return V | (uint64_t)Read(NumBits-32) << 32; - } - uint32_t ReadVBR(unsigned NumBits) { uint32_t Piece = Read(NumBits); if ((Piece & (1U << (NumBits-1))) == 0) @@ -418,8 +394,8 @@ public: } } - // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size. The - // chunk size of the VBR must still be <= 32 bits though. + // Read a VBR that may have a value up to 64-bits in size. The chunk size of + // the VBR must still be <= 32 bits though. uint64_t ReadVBR64(unsigned NumBits) { uint32_t Piece = Read(NumBits); if ((Piece & (1U << (NumBits-1))) == 0) @@ -450,7 +426,6 @@ private: } BitsInCurWord = 0; - CurWord = 0; } public: @@ -462,15 +437,13 @@ public: // Block header: // [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen] - /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for - /// the block. + /// Having read the ENTER_SUBBLOCK code, read the BlockID for the block. unsigned ReadSubBlockID() { return ReadVBR(bitc::BlockIDWidth); } - /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip - /// over the body of this block. If the block record is malformed, return - /// true. + /// Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip over the body + /// of this block. If the block record is malformed, return true. bool SkipBlock() { // Read and ignore the codelen value. Since we are skipping this block, we // don't care what code widths are used inside of it. @@ -488,8 +461,8 @@ public: return false; } - /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter - /// the block, and return true if the block has an error. + /// Having read the ENTER_SUBBLOCK abbrevid, enter the block, and return true + /// if the block has an error. bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = nullptr); bool ReadBlockEnd() { @@ -508,12 +481,7 @@ private: void popBlockScope() { CurCodeSize = BlockScope.back().PrevCodeSize; - // Delete abbrevs from popped scope. - for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size()); - i != e; ++i) - CurAbbrevs[i]->dropRef(); - - BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + CurAbbrevs = std::move(BlockScope.back().PrevAbbrevs); BlockScope.pop_back(); } @@ -521,23 +489,16 @@ private: // Record Processing //===--------------------------------------------------------------------===// -private: - void readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, - SmallVectorImpl<uint64_t> &Vals); - void readAbbreviatedField(const BitCodeAbbrevOp &Op, - SmallVectorImpl<uint64_t> &Vals); - void skipAbbreviatedField(const BitCodeAbbrevOp &Op); - public: - /// getAbbrev - Return the abbreviation for the specified AbbrevId. + /// Return the abbreviation for the specified AbbrevId. const BitCodeAbbrev *getAbbrev(unsigned AbbrevID) { unsigned AbbrevNo = AbbrevID-bitc::FIRST_APPLICATION_ABBREV; assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!"); - return CurAbbrevs[AbbrevNo]; + return CurAbbrevs[AbbrevNo].get(); } - /// skipRecord - Read the current record and discard it. + /// Read the current record and discard it. void skipRecord(unsigned AbbrevID); unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals, |