diff options
Diffstat (limited to 'contrib/llvm/lib/ProfileData')
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMapping.cpp | 522 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp | 555 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp | 183 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProf.cpp | 578 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfReader.cpp | 653 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfWriter.cpp | 252 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProf.cpp | 149 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProfReader.cpp | 727 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProfWriter.cpp | 240 |
9 files changed, 3859 insertions, 0 deletions
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp new file mode 100644 index 0000000..55c0fb4 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp @@ -0,0 +1,522 @@ +//=-- CoverageMapping.cpp - Code coverage mapping support ---------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for clang's and llvm's instrumentation based +// code coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/CoverageMapping.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ProfileData/CoverageMappingReader.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace coverage; + +#define DEBUG_TYPE "coverage-mapping" + +Counter CounterExpressionBuilder::get(const CounterExpression &E) { + auto It = ExpressionIndices.find(E); + if (It != ExpressionIndices.end()) + return Counter::getExpression(It->second); + unsigned I = Expressions.size(); + Expressions.push_back(E); + ExpressionIndices[E] = I; + return Counter::getExpression(I); +} + +void CounterExpressionBuilder::extractTerms( + Counter C, int Sign, SmallVectorImpl<std::pair<unsigned, int>> &Terms) { + switch (C.getKind()) { + case Counter::Zero: + break; + case Counter::CounterValueReference: + Terms.push_back(std::make_pair(C.getCounterID(), Sign)); + break; + case Counter::Expression: + const auto &E = Expressions[C.getExpressionID()]; + extractTerms(E.LHS, Sign, Terms); + extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign, + Terms); + break; + } +} + +Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { + // Gather constant terms. + llvm::SmallVector<std::pair<unsigned, int>, 32> Terms; + extractTerms(ExpressionTree, +1, Terms); + + // If there are no terms, this is just a zero. The algorithm below assumes at + // least one term. + if (Terms.size() == 0) + return Counter::getZero(); + + // Group the terms by counter ID. + std::sort(Terms.begin(), Terms.end(), + [](const std::pair<unsigned, int> &LHS, + const std::pair<unsigned, int> &RHS) { + return LHS.first < RHS.first; + }); + + // Combine terms by counter ID to eliminate counters that sum to zero. + auto Prev = Terms.begin(); + for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) { + if (I->first == Prev->first) { + Prev->second += I->second; + continue; + } + ++Prev; + *Prev = *I; + } + Terms.erase(++Prev, Terms.end()); + + Counter C; + // Create additions. We do this before subtractions to avoid constructs like + // ((0 - X) + Y), as opposed to (Y - X). + for (auto Term : Terms) { + if (Term.second <= 0) + continue; + for (int I = 0; I < Term.second; ++I) + if (C.isZero()) + C = Counter::getCounter(Term.first); + else + C = get(CounterExpression(CounterExpression::Add, C, + Counter::getCounter(Term.first))); + } + + // Create subtractions. + for (auto Term : Terms) { + if (Term.second >= 0) + continue; + for (int I = 0; I < -Term.second; ++I) + C = get(CounterExpression(CounterExpression::Subtract, C, + Counter::getCounter(Term.first))); + } + return C; +} + +Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) { + return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS))); +} + +Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) { + return simplify( + get(CounterExpression(CounterExpression::Subtract, LHS, RHS))); +} + +void CounterMappingContext::dump(const Counter &C, + llvm::raw_ostream &OS) const { + switch (C.getKind()) { + case Counter::Zero: + OS << '0'; + return; + case Counter::CounterValueReference: + OS << '#' << C.getCounterID(); + break; + case Counter::Expression: { + if (C.getExpressionID() >= Expressions.size()) + return; + const auto &E = Expressions[C.getExpressionID()]; + OS << '('; + dump(E.LHS, OS); + OS << (E.Kind == CounterExpression::Subtract ? " - " : " + "); + dump(E.RHS, OS); + OS << ')'; + break; + } + } + if (CounterValues.empty()) + return; + ErrorOr<int64_t> Value = evaluate(C); + if (!Value) + return; + OS << '[' << *Value << ']'; +} + +ErrorOr<int64_t> CounterMappingContext::evaluate(const Counter &C) const { + switch (C.getKind()) { + case Counter::Zero: + return 0; + case Counter::CounterValueReference: + if (C.getCounterID() >= CounterValues.size()) + return make_error_code(errc::argument_out_of_domain); + return CounterValues[C.getCounterID()]; + case Counter::Expression: { + if (C.getExpressionID() >= Expressions.size()) + return make_error_code(errc::argument_out_of_domain); + const auto &E = Expressions[C.getExpressionID()]; + ErrorOr<int64_t> LHS = evaluate(E.LHS); + if (!LHS) + return LHS; + ErrorOr<int64_t> RHS = evaluate(E.RHS); + if (!RHS) + return RHS; + return E.Kind == CounterExpression::Subtract ? *LHS - *RHS : *LHS + *RHS; + } + } + llvm_unreachable("Unhandled CounterKind"); +} + +void FunctionRecordIterator::skipOtherFiles() { + while (Current != Records.end() && !Filename.empty() && + Filename != Current->Filenames[0]) + ++Current; + if (Current == Records.end()) + *this = FunctionRecordIterator(); +} + +ErrorOr<std::unique_ptr<CoverageMapping>> +CoverageMapping::load(CoverageMappingReader &CoverageReader, + IndexedInstrProfReader &ProfileReader) { + auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping()); + + std::vector<uint64_t> Counts; + for (const auto &Record : CoverageReader) { + CounterMappingContext Ctx(Record.Expressions); + + Counts.clear(); + if (std::error_code EC = ProfileReader.getFunctionCounts( + Record.FunctionName, Record.FunctionHash, Counts)) { + if (EC == instrprof_error::hash_mismatch) { + Coverage->MismatchedFunctionCount++; + continue; + } else if (EC != instrprof_error::unknown_function) + return EC; + Counts.assign(Record.MappingRegions.size(), 0); + } + Ctx.setCounts(Counts); + + assert(!Record.MappingRegions.empty() && "Function has no regions"); + + StringRef OrigFuncName = Record.FunctionName; + if (!Record.Filenames.empty()) + OrigFuncName = + getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]); + FunctionRecord Function(OrigFuncName, Record.Filenames); + for (const auto &Region : Record.MappingRegions) { + ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count); + if (!ExecutionCount) + break; + Function.pushRegion(Region, *ExecutionCount); + } + if (Function.CountedRegions.size() != Record.MappingRegions.size()) { + Coverage->MismatchedFunctionCount++; + continue; + } + + Coverage->Functions.push_back(std::move(Function)); + } + + return std::move(Coverage); +} + +ErrorOr<std::unique_ptr<CoverageMapping>> +CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename, + StringRef Arch) { + auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename); + if (std::error_code EC = CounterMappingBuff.getError()) + return EC; + auto CoverageReaderOrErr = + BinaryCoverageReader::create(CounterMappingBuff.get(), Arch); + if (std::error_code EC = CoverageReaderOrErr.getError()) + return EC; + auto CoverageReader = std::move(CoverageReaderOrErr.get()); + auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename); + if (auto EC = ProfileReaderOrErr.getError()) + return EC; + auto ProfileReader = std::move(ProfileReaderOrErr.get()); + return load(*CoverageReader, *ProfileReader); +} + +namespace { +/// \brief Distributes functions into instantiation sets. +/// +/// An instantiation set is a collection of functions that have the same source +/// code, ie, template functions specializations. +class FunctionInstantiationSetCollector { + typedef DenseMap<std::pair<unsigned, unsigned>, + std::vector<const FunctionRecord *>> MapT; + MapT InstantiatedFunctions; + +public: + void insert(const FunctionRecord &Function, unsigned FileID) { + auto I = Function.CountedRegions.begin(), E = Function.CountedRegions.end(); + while (I != E && I->FileID != FileID) + ++I; + assert(I != E && "function does not cover the given file"); + auto &Functions = InstantiatedFunctions[I->startLoc()]; + Functions.push_back(&Function); + } + + MapT::iterator begin() { return InstantiatedFunctions.begin(); } + + MapT::iterator end() { return InstantiatedFunctions.end(); } +}; + +class SegmentBuilder { + std::vector<CoverageSegment> Segments; + SmallVector<const CountedRegion *, 8> ActiveRegions; + + /// Start a segment with no count specified. + void startSegment(unsigned Line, unsigned Col) { + DEBUG(dbgs() << "Top level segment at " << Line << ":" << Col << "\n"); + Segments.emplace_back(Line, Col, /*IsRegionEntry=*/false); + } + + /// Start a segment with the given Region's count. + void startSegment(unsigned Line, unsigned Col, bool IsRegionEntry, + const CountedRegion &Region) { + if (Segments.empty()) + Segments.emplace_back(Line, Col, IsRegionEntry); + CoverageSegment S = Segments.back(); + // Avoid creating empty regions. + if (S.Line != Line || S.Col != Col) { + Segments.emplace_back(Line, Col, IsRegionEntry); + S = Segments.back(); + } + DEBUG(dbgs() << "Segment at " << Line << ":" << Col); + // Set this region's count. + if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion) { + DEBUG(dbgs() << " with count " << Region.ExecutionCount); + Segments.back().setCount(Region.ExecutionCount); + } + DEBUG(dbgs() << "\n"); + } + + /// Start a segment for the given region. + void startSegment(const CountedRegion &Region) { + startSegment(Region.LineStart, Region.ColumnStart, true, Region); + } + + /// Pop the top region off of the active stack, starting a new segment with + /// the containing Region's count. + void popRegion() { + const CountedRegion *Active = ActiveRegions.back(); + unsigned Line = Active->LineEnd, Col = Active->ColumnEnd; + ActiveRegions.pop_back(); + if (ActiveRegions.empty()) + startSegment(Line, Col); + else + startSegment(Line, Col, false, *ActiveRegions.back()); + } + +public: + /// Build a list of CoverageSegments from a sorted list of Regions. + std::vector<CoverageSegment> buildSegments(ArrayRef<CountedRegion> Regions) { + const CountedRegion *PrevRegion = nullptr; + for (const auto &Region : Regions) { + // Pop any regions that end before this one starts. + while (!ActiveRegions.empty() && + ActiveRegions.back()->endLoc() <= Region.startLoc()) + popRegion(); + if (PrevRegion && PrevRegion->startLoc() == Region.startLoc() && + PrevRegion->endLoc() == Region.endLoc()) { + if (Region.Kind == coverage::CounterMappingRegion::CodeRegion) + Segments.back().addCount(Region.ExecutionCount); + } else { + // Add this region to the stack. + ActiveRegions.push_back(&Region); + startSegment(Region); + } + PrevRegion = &Region; + } + // Pop any regions that are left in the stack. + while (!ActiveRegions.empty()) + popRegion(); + return Segments; + } +}; +} + +std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const { + std::vector<StringRef> Filenames; + for (const auto &Function : getCoveredFunctions()) + Filenames.insert(Filenames.end(), Function.Filenames.begin(), + Function.Filenames.end()); + std::sort(Filenames.begin(), Filenames.end()); + auto Last = std::unique(Filenames.begin(), Filenames.end()); + Filenames.erase(Last, Filenames.end()); + return Filenames; +} + +static SmallBitVector gatherFileIDs(StringRef SourceFile, + const FunctionRecord &Function) { + SmallBitVector FilenameEquivalence(Function.Filenames.size(), false); + for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I) + if (SourceFile == Function.Filenames[I]) + FilenameEquivalence[I] = true; + return FilenameEquivalence; +} + +static Optional<unsigned> findMainViewFileID(StringRef SourceFile, + const FunctionRecord &Function) { + SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true); + SmallBitVector FilenameEquivalence = gatherFileIDs(SourceFile, Function); + for (const auto &CR : Function.CountedRegions) + if (CR.Kind == CounterMappingRegion::ExpansionRegion && + FilenameEquivalence[CR.FileID]) + IsNotExpandedFile[CR.ExpandedFileID] = false; + IsNotExpandedFile &= FilenameEquivalence; + int I = IsNotExpandedFile.find_first(); + if (I == -1) + return None; + return I; +} + +static Optional<unsigned> findMainViewFileID(const FunctionRecord &Function) { + SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true); + for (const auto &CR : Function.CountedRegions) + if (CR.Kind == CounterMappingRegion::ExpansionRegion) + IsNotExpandedFile[CR.ExpandedFileID] = false; + int I = IsNotExpandedFile.find_first(); + if (I == -1) + return None; + return I; +} + +/// Sort a nested sequence of regions from a single file. +template <class It> static void sortNestedRegions(It First, It Last) { + std::sort(First, Last, + [](const CountedRegion &LHS, const CountedRegion &RHS) { + if (LHS.startLoc() == RHS.startLoc()) + // When LHS completely contains RHS, we sort LHS first. + return RHS.endLoc() < LHS.endLoc(); + return LHS.startLoc() < RHS.startLoc(); + }); +} + +static bool isExpansion(const CountedRegion &R, unsigned FileID) { + return R.Kind == CounterMappingRegion::ExpansionRegion && R.FileID == FileID; +} + +CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) { + CoverageData FileCoverage(Filename); + std::vector<coverage::CountedRegion> Regions; + + for (const auto &Function : Functions) { + auto MainFileID = findMainViewFileID(Filename, Function); + if (!MainFileID) + continue; + auto FileIDs = gatherFileIDs(Filename, Function); + for (const auto &CR : Function.CountedRegions) + if (FileIDs.test(CR.FileID)) { + Regions.push_back(CR); + if (isExpansion(CR, *MainFileID)) + FileCoverage.Expansions.emplace_back(CR, Function); + } + } + + sortNestedRegions(Regions.begin(), Regions.end()); + DEBUG(dbgs() << "Emitting segments for file: " << Filename << "\n"); + FileCoverage.Segments = SegmentBuilder().buildSegments(Regions); + + return FileCoverage; +} + +std::vector<const FunctionRecord *> +CoverageMapping::getInstantiations(StringRef Filename) { + FunctionInstantiationSetCollector InstantiationSetCollector; + for (const auto &Function : Functions) { + auto MainFileID = findMainViewFileID(Filename, Function); + if (!MainFileID) + continue; + InstantiationSetCollector.insert(Function, *MainFileID); + } + + std::vector<const FunctionRecord *> Result; + for (const auto &InstantiationSet : InstantiationSetCollector) { + if (InstantiationSet.second.size() < 2) + continue; + Result.insert(Result.end(), InstantiationSet.second.begin(), + InstantiationSet.second.end()); + } + return Result; +} + +CoverageData +CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) { + auto MainFileID = findMainViewFileID(Function); + if (!MainFileID) + return CoverageData(); + + CoverageData FunctionCoverage(Function.Filenames[*MainFileID]); + std::vector<coverage::CountedRegion> Regions; + for (const auto &CR : Function.CountedRegions) + if (CR.FileID == *MainFileID) { + Regions.push_back(CR); + if (isExpansion(CR, *MainFileID)) + FunctionCoverage.Expansions.emplace_back(CR, Function); + } + + sortNestedRegions(Regions.begin(), Regions.end()); + DEBUG(dbgs() << "Emitting segments for function: " << Function.Name << "\n"); + FunctionCoverage.Segments = SegmentBuilder().buildSegments(Regions); + + return FunctionCoverage; +} + +CoverageData +CoverageMapping::getCoverageForExpansion(const ExpansionRecord &Expansion) { + CoverageData ExpansionCoverage( + Expansion.Function.Filenames[Expansion.FileID]); + std::vector<coverage::CountedRegion> Regions; + for (const auto &CR : Expansion.Function.CountedRegions) + if (CR.FileID == Expansion.FileID) { + Regions.push_back(CR); + if (isExpansion(CR, Expansion.FileID)) + ExpansionCoverage.Expansions.emplace_back(CR, Expansion.Function); + } + + sortNestedRegions(Regions.begin(), Regions.end()); + DEBUG(dbgs() << "Emitting segments for expansion of file " << Expansion.FileID + << "\n"); + ExpansionCoverage.Segments = SegmentBuilder().buildSegments(Regions); + + return ExpansionCoverage; +} + +namespace { +class CoverageMappingErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.coveragemap"; } + std::string message(int IE) const override { + auto E = static_cast<coveragemap_error>(IE); + switch (E) { + case coveragemap_error::success: + return "Success"; + case coveragemap_error::eof: + return "End of File"; + case coveragemap_error::no_data_found: + return "No coverage data found"; + case coveragemap_error::unsupported_version: + return "Unsupported coverage format version"; + case coveragemap_error::truncated: + return "Truncated coverage data"; + case coveragemap_error::malformed: + return "Malformed coverage data"; + } + llvm_unreachable("A value of coveragemap_error has no message."); + } +}; +} + +static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::coveragemap_category() { + return *ErrorCategory; +} diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp new file mode 100644 index 0000000..32c692d --- /dev/null +++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp @@ -0,0 +1,555 @@ +//=-- CoverageMappingReader.cpp - Code coverage mapping reader ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/CoverageMappingReader.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace coverage; +using namespace object; + +#define DEBUG_TYPE "coverage-mapping" + +void CoverageMappingIterator::increment() { + // Check if all the records were read or if an error occurred while reading + // the next record. + if (Reader->readNextRecord(Record)) + *this = CoverageMappingIterator(); +} + +std::error_code RawCoverageReader::readULEB128(uint64_t &Result) { + if (Data.size() < 1) + return coveragemap_error::truncated; + unsigned N = 0; + Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); + if (N > Data.size()) + return coveragemap_error::malformed; + Data = Data.substr(N); + return std::error_code(); +} + +std::error_code RawCoverageReader::readIntMax(uint64_t &Result, + uint64_t MaxPlus1) { + if (auto Err = readULEB128(Result)) + return Err; + if (Result >= MaxPlus1) + return coveragemap_error::malformed; + return std::error_code(); +} + +std::error_code RawCoverageReader::readSize(uint64_t &Result) { + if (auto Err = readULEB128(Result)) + return Err; + // Sanity check the number. + if (Result > Data.size()) + return coveragemap_error::malformed; + return std::error_code(); +} + +std::error_code RawCoverageReader::readString(StringRef &Result) { + uint64_t Length; + if (auto Err = readSize(Length)) + return Err; + Result = Data.substr(0, Length); + Data = Data.substr(Length); + return std::error_code(); +} + +std::error_code RawCoverageFilenamesReader::read() { + uint64_t NumFilenames; + if (auto Err = readSize(NumFilenames)) + return Err; + for (size_t I = 0; I < NumFilenames; ++I) { + StringRef Filename; + if (auto Err = readString(Filename)) + return Err; + Filenames.push_back(Filename); + } + return std::error_code(); +} + +std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value, + Counter &C) { + auto Tag = Value & Counter::EncodingTagMask; + switch (Tag) { + case Counter::Zero: + C = Counter::getZero(); + return std::error_code(); + case Counter::CounterValueReference: + C = Counter::getCounter(Value >> Counter::EncodingTagBits); + return std::error_code(); + default: + break; + } + Tag -= Counter::Expression; + switch (Tag) { + case CounterExpression::Subtract: + case CounterExpression::Add: { + auto ID = Value >> Counter::EncodingTagBits; + if (ID >= Expressions.size()) + return coveragemap_error::malformed; + Expressions[ID].Kind = CounterExpression::ExprKind(Tag); + C = Counter::getExpression(ID); + break; + } + default: + return coveragemap_error::malformed; + } + return std::error_code(); +} + +std::error_code RawCoverageMappingReader::readCounter(Counter &C) { + uint64_t EncodedCounter; + if (auto Err = + readIntMax(EncodedCounter, std::numeric_limits<unsigned>::max())) + return Err; + if (auto Err = decodeCounter(EncodedCounter, C)) + return Err; + return std::error_code(); +} + +static const unsigned EncodingExpansionRegionBit = 1 + << Counter::EncodingTagBits; + +/// \brief Read the sub-array of regions for the given inferred file id. +/// \param NumFileIDs the number of file ids that are defined for this +/// function. +std::error_code RawCoverageMappingReader::readMappingRegionsSubArray( + std::vector<CounterMappingRegion> &MappingRegions, unsigned InferredFileID, + size_t NumFileIDs) { + uint64_t NumRegions; + if (auto Err = readSize(NumRegions)) + return Err; + unsigned LineStart = 0; + for (size_t I = 0; I < NumRegions; ++I) { + Counter C; + CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion; + + // Read the combined counter + region kind. + uint64_t EncodedCounterAndRegion; + if (auto Err = readIntMax(EncodedCounterAndRegion, + std::numeric_limits<unsigned>::max())) + return Err; + unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask; + uint64_t ExpandedFileID = 0; + if (Tag != Counter::Zero) { + if (auto Err = decodeCounter(EncodedCounterAndRegion, C)) + return Err; + } else { + // Is it an expansion region? + if (EncodedCounterAndRegion & EncodingExpansionRegionBit) { + Kind = CounterMappingRegion::ExpansionRegion; + ExpandedFileID = EncodedCounterAndRegion >> + Counter::EncodingCounterTagAndExpansionRegionTagBits; + if (ExpandedFileID >= NumFileIDs) + return coveragemap_error::malformed; + } else { + switch (EncodedCounterAndRegion >> + Counter::EncodingCounterTagAndExpansionRegionTagBits) { + case CounterMappingRegion::CodeRegion: + // Don't do anything when we have a code region with a zero counter. + break; + case CounterMappingRegion::SkippedRegion: + Kind = CounterMappingRegion::SkippedRegion; + break; + default: + return coveragemap_error::malformed; + } + } + } + + // Read the source range. + uint64_t LineStartDelta, ColumnStart, NumLines, ColumnEnd; + if (auto Err = + readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max())) + return Err; + if (auto Err = readULEB128(ColumnStart)) + return Err; + if (ColumnStart > std::numeric_limits<unsigned>::max()) + return coveragemap_error::malformed; + if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max())) + return Err; + if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max())) + return Err; + LineStart += LineStartDelta; + // Adjust the column locations for the empty regions that are supposed to + // cover whole lines. Those regions should be encoded with the + // column range (1 -> std::numeric_limits<unsigned>::max()), but because + // the encoded std::numeric_limits<unsigned>::max() is several bytes long, + // we set the column range to (0 -> 0) to ensure that the column start and + // column end take up one byte each. + // The std::numeric_limits<unsigned>::max() is used to represent a column + // position at the end of the line without knowing the length of that line. + if (ColumnStart == 0 && ColumnEnd == 0) { + ColumnStart = 1; + ColumnEnd = std::numeric_limits<unsigned>::max(); + } + + DEBUG({ + dbgs() << "Counter in file " << InferredFileID << " " << LineStart << ":" + << ColumnStart << " -> " << (LineStart + NumLines) << ":" + << ColumnEnd << ", "; + if (Kind == CounterMappingRegion::ExpansionRegion) + dbgs() << "Expands to file " << ExpandedFileID; + else + CounterMappingContext(Expressions).dump(C, dbgs()); + dbgs() << "\n"; + }); + + MappingRegions.push_back(CounterMappingRegion( + C, InferredFileID, ExpandedFileID, LineStart, ColumnStart, + LineStart + NumLines, ColumnEnd, Kind)); + } + return std::error_code(); +} + +std::error_code RawCoverageMappingReader::read() { + + // Read the virtual file mapping. + llvm::SmallVector<unsigned, 8> VirtualFileMapping; + uint64_t NumFileMappings; + if (auto Err = readSize(NumFileMappings)) + return Err; + for (size_t I = 0; I < NumFileMappings; ++I) { + uint64_t FilenameIndex; + if (auto Err = readIntMax(FilenameIndex, TranslationUnitFilenames.size())) + return Err; + VirtualFileMapping.push_back(FilenameIndex); + } + + // Construct the files using unique filenames and virtual file mapping. + for (auto I : VirtualFileMapping) { + Filenames.push_back(TranslationUnitFilenames[I]); + } + + // Read the expressions. + uint64_t NumExpressions; + if (auto Err = readSize(NumExpressions)) + return Err; + // Create an array of dummy expressions that get the proper counters + // when the expressions are read, and the proper kinds when the counters + // are decoded. + Expressions.resize( + NumExpressions, + CounterExpression(CounterExpression::Subtract, Counter(), Counter())); + for (size_t I = 0; I < NumExpressions; ++I) { + if (auto Err = readCounter(Expressions[I].LHS)) + return Err; + if (auto Err = readCounter(Expressions[I].RHS)) + return Err; + } + + // Read the mapping regions sub-arrays. + for (unsigned InferredFileID = 0, S = VirtualFileMapping.size(); + InferredFileID < S; ++InferredFileID) { + if (auto Err = readMappingRegionsSubArray(MappingRegions, InferredFileID, + VirtualFileMapping.size())) + return Err; + } + + // Set the counters for the expansion regions. + // i.e. Counter of expansion region = counter of the first region + // from the expanded file. + // Perform multiple passes to correctly propagate the counters through + // all the nested expansion regions. + SmallVector<CounterMappingRegion *, 8> FileIDExpansionRegionMapping; + FileIDExpansionRegionMapping.resize(VirtualFileMapping.size(), nullptr); + for (unsigned Pass = 1, S = VirtualFileMapping.size(); Pass < S; ++Pass) { + for (auto &R : MappingRegions) { + if (R.Kind != CounterMappingRegion::ExpansionRegion) + continue; + assert(!FileIDExpansionRegionMapping[R.ExpandedFileID]); + FileIDExpansionRegionMapping[R.ExpandedFileID] = &R; + } + for (auto &R : MappingRegions) { + if (FileIDExpansionRegionMapping[R.FileID]) { + FileIDExpansionRegionMapping[R.FileID]->Count = R.Count; + FileIDExpansionRegionMapping[R.FileID] = nullptr; + } + } + } + + return std::error_code(); +} + +std::error_code InstrProfSymtab::create(SectionRef &Section) { + if (auto Err = Section.getContents(Data)) + return Err; + Address = Section.getAddress(); + return std::error_code(); +} + +StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) { + if (Pointer < Address) + return StringRef(); + auto Offset = Pointer - Address; + if (Offset + Size > Data.size()) + return StringRef(); + return Data.substr(Pointer - Address, Size); +} + +template <typename T, support::endianness Endian> +static std::error_code readCoverageMappingData( + InstrProfSymtab &ProfileNames, StringRef Data, + std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records, + std::vector<StringRef> &Filenames) { + using namespace support; + llvm::DenseSet<T> UniqueFunctionMappingData; + + // Read the records in the coverage data section. + for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) { + if (Buf + sizeof(CovMapHeader) > End) + return coveragemap_error::malformed; + auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf); + uint32_t NRecords = + endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords); + uint32_t FilenamesSize = + endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize); + uint32_t CoverageSize = + endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize); + uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version); + Buf = reinterpret_cast<const char *>(++CovHeader); + + switch (Version) { + case CoverageMappingVersion1: + break; + default: + return coveragemap_error::unsupported_version; + } + + // Skip past the function records, saving the start and end for later. + const char *FunBuf = Buf; + Buf += NRecords * sizeof(coverage::CovMapFunctionRecord<T>); + const char *FunEnd = Buf; + + // Get the filenames. + if (Buf + FilenamesSize > End) + return coveragemap_error::malformed; + size_t FilenamesBegin = Filenames.size(); + RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames); + if (auto Err = Reader.read()) + return Err; + Buf += FilenamesSize; + + // We'll read the coverage mapping records in the loop below. + const char *CovBuf = Buf; + Buf += CoverageSize; + const char *CovEnd = Buf; + + if (Buf > End) + return coveragemap_error::malformed; + // Each coverage map has an alignment of 8, so we need to adjust alignment + // before reading the next map. + Buf += alignmentAdjustment(Buf, 8); + + auto CFR = + reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf); + while ((const char *)CFR < FunEnd) { + // Read the function information + T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr); + uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize); + uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize); + uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash); + CFR++; + + // Now use that to read the coverage data. + if (CovBuf + DataSize > CovEnd) + return coveragemap_error::malformed; + auto Mapping = StringRef(CovBuf, DataSize); + CovBuf += DataSize; + + // Ignore this record if we already have a record that points to the same + // function name. This is useful to ignore the redundant records for the + // functions with ODR linkage. + if (!UniqueFunctionMappingData.insert(NamePtr).second) + continue; + + // Finally, grab the name and create a record. + StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize); + if (NameSize && FuncName.empty()) + return coveragemap_error::malformed; + Records.push_back(BinaryCoverageReader::ProfileMappingRecord( + CoverageMappingVersion(Version), FuncName, FuncHash, Mapping, + FilenamesBegin, Filenames.size() - FilenamesBegin)); + } + } + + return std::error_code(); +} + +static const char *TestingFormatMagic = "llvmcovmtestdata"; + +static std::error_code loadTestingFormat(StringRef Data, + InstrProfSymtab &ProfileNames, + StringRef &CoverageMapping, + uint8_t &BytesInAddress, + support::endianness &Endian) { + BytesInAddress = 8; + Endian = support::endianness::little; + + Data = Data.substr(StringRef(TestingFormatMagic).size()); + if (Data.size() < 1) + return coveragemap_error::truncated; + unsigned N = 0; + auto ProfileNamesSize = + decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); + if (N > Data.size()) + return coveragemap_error::malformed; + Data = Data.substr(N); + if (Data.size() < 1) + return coveragemap_error::truncated; + N = 0; + uint64_t Address = + decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); + if (N > Data.size()) + return coveragemap_error::malformed; + Data = Data.substr(N); + if (Data.size() < ProfileNamesSize) + return coveragemap_error::malformed; + ProfileNames.create(Data.substr(0, ProfileNamesSize), Address); + CoverageMapping = Data.substr(ProfileNamesSize); + return std::error_code(); +} + +static ErrorOr<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) { + StringRef FoundName; + for (const auto &Section : OF.sections()) { + if (auto EC = Section.getName(FoundName)) + return EC; + if (FoundName == Name) + return Section; + } + return coveragemap_error::no_data_found; +} + +static std::error_code +loadBinaryFormat(MemoryBufferRef ObjectBuffer, InstrProfSymtab &ProfileNames, + StringRef &CoverageMapping, uint8_t &BytesInAddress, + support::endianness &Endian, StringRef Arch) { + auto BinOrErr = object::createBinary(ObjectBuffer); + if (std::error_code EC = BinOrErr.getError()) + return EC; + auto Bin = std::move(BinOrErr.get()); + std::unique_ptr<ObjectFile> OF; + if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) { + // If we have a universal binary, try to look up the object for the + // appropriate architecture. + auto ObjectFileOrErr = Universal->getObjectForArch(Arch); + if (std::error_code EC = ObjectFileOrErr.getError()) + return EC; + OF = std::move(ObjectFileOrErr.get()); + } else if (isa<object::ObjectFile>(Bin.get())) { + // For any other object file, upcast and take ownership. + OF.reset(cast<object::ObjectFile>(Bin.release())); + // If we've asked for a particular arch, make sure they match. + if (!Arch.empty() && OF->getArch() != Triple(Arch).getArch()) + return object_error::arch_not_found; + } else + // We can only handle object files. + return coveragemap_error::malformed; + + // The coverage uses native pointer sizes for the object it's written in. + BytesInAddress = OF->getBytesInAddress(); + Endian = OF->isLittleEndian() ? support::endianness::little + : support::endianness::big; + + // Look for the sections that we are interested in. + auto NamesSection = lookupSection(*OF, getInstrProfNameSectionName(false)); + if (auto EC = NamesSection.getError()) + return EC; + auto CoverageSection = + lookupSection(*OF, getInstrProfCoverageSectionName(false)); + if (auto EC = CoverageSection.getError()) + return EC; + + // Get the contents of the given sections. + if (std::error_code EC = CoverageSection->getContents(CoverageMapping)) + return EC; + if (std::error_code EC = ProfileNames.create(*NamesSection)) + return EC; + + return std::error_code(); +} + +ErrorOr<std::unique_ptr<BinaryCoverageReader>> +BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer, + StringRef Arch) { + std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader()); + + InstrProfSymtab ProfileNames; + StringRef Coverage; + uint8_t BytesInAddress; + support::endianness Endian; + std::error_code EC; + if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) + // This is a special format used for testing. + EC = loadTestingFormat(ObjectBuffer->getBuffer(), ProfileNames, Coverage, + BytesInAddress, Endian); + else + EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), ProfileNames, + Coverage, BytesInAddress, Endian, Arch); + if (EC) + return EC; + + if (BytesInAddress == 4 && Endian == support::endianness::little) + EC = readCoverageMappingData<uint32_t, support::endianness::little>( + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 4 && Endian == support::endianness::big) + EC = readCoverageMappingData<uint32_t, support::endianness::big>( + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 8 && Endian == support::endianness::little) + EC = readCoverageMappingData<uint64_t, support::endianness::little>( + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 8 && Endian == support::endianness::big) + EC = readCoverageMappingData<uint64_t, support::endianness::big>( + ProfileNames, Coverage, Reader->MappingRecords, Reader->Filenames); + else + return coveragemap_error::malformed; + if (EC) + return EC; + return std::move(Reader); +} + +std::error_code +BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) { + if (CurrentRecord >= MappingRecords.size()) + return coveragemap_error::eof; + + FunctionsFilenames.clear(); + Expressions.clear(); + MappingRegions.clear(); + auto &R = MappingRecords[CurrentRecord]; + RawCoverageMappingReader Reader( + R.CoverageMapping, + makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize), + FunctionsFilenames, Expressions, MappingRegions); + if (auto Err = Reader.read()) + return Err; + + Record.FunctionName = R.FunctionName; + Record.FunctionHash = R.FunctionHash; + Record.Filenames = FunctionsFilenames; + Record.Expressions = Expressions; + Record.MappingRegions = MappingRegions; + + ++CurrentRecord; + return std::error_code(); +} diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp new file mode 100644 index 0000000..d90d2f5 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp @@ -0,0 +1,183 @@ +//=-- CoverageMappingWriter.cpp - Code coverage mapping writer -------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/CoverageMappingWriter.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; +using namespace coverage; + +void CoverageFilenamesSectionWriter::write(raw_ostream &OS) { + encodeULEB128(Filenames.size(), OS); + for (const auto &Filename : Filenames) { + encodeULEB128(Filename.size(), OS); + OS << Filename; + } +} + +namespace { +/// \brief Gather only the expressions that are used by the mapping +/// regions in this function. +class CounterExpressionsMinimizer { + ArrayRef<CounterExpression> Expressions; + llvm::SmallVector<CounterExpression, 16> UsedExpressions; + std::vector<unsigned> AdjustedExpressionIDs; + +public: + void mark(Counter C) { + if (!C.isExpression()) + return; + unsigned ID = C.getExpressionID(); + AdjustedExpressionIDs[ID] = 1; + mark(Expressions[ID].LHS); + mark(Expressions[ID].RHS); + } + + void gatherUsed(Counter C) { + if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()]) + return; + AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size(); + const auto &E = Expressions[C.getExpressionID()]; + UsedExpressions.push_back(E); + gatherUsed(E.LHS); + gatherUsed(E.RHS); + } + + CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions, + ArrayRef<CounterMappingRegion> MappingRegions) + : Expressions(Expressions) { + AdjustedExpressionIDs.resize(Expressions.size(), 0); + for (const auto &I : MappingRegions) + mark(I.Count); + for (const auto &I : MappingRegions) + gatherUsed(I.Count); + } + + ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; } + + /// \brief Adjust the given counter to correctly transition from the old + /// expression ids to the new expression ids. + Counter adjust(Counter C) const { + if (C.isExpression()) + C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]); + return C; + } +}; +} + +/// \brief Encode the counter. +/// +/// The encoding uses the following format: +/// Low 2 bits - Tag: +/// Counter::Zero(0) - A Counter with kind Counter::Zero +/// Counter::CounterValueReference(1) - A counter with kind +/// Counter::CounterValueReference +/// Counter::Expression(2) + CounterExpression::Subtract(0) - +/// A counter with kind Counter::Expression and an expression +/// with kind CounterExpression::Subtract +/// Counter::Expression(2) + CounterExpression::Add(1) - +/// A counter with kind Counter::Expression and an expression +/// with kind CounterExpression::Add +/// Remaining bits - Counter/Expression ID. +static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions, + Counter C) { + unsigned Tag = unsigned(C.getKind()); + if (C.isExpression()) + Tag += Expressions[C.getExpressionID()].Kind; + unsigned ID = C.getCounterID(); + assert(ID <= + (std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits)); + return Tag | (ID << Counter::EncodingTagBits); +} + +static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C, + raw_ostream &OS) { + encodeULEB128(encodeCounter(Expressions, C), OS); +} + +void CoverageMappingWriter::write(raw_ostream &OS) { + // Sort the regions in an ascending order by the file id and the starting + // location. + std::stable_sort(MappingRegions.begin(), MappingRegions.end()); + + // Write out the fileid -> filename mapping. + encodeULEB128(VirtualFileMapping.size(), OS); + for (const auto &FileID : VirtualFileMapping) + encodeULEB128(FileID, OS); + + // Write out the expressions. + CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions); + auto MinExpressions = Minimizer.getExpressions(); + encodeULEB128(MinExpressions.size(), OS); + for (const auto &E : MinExpressions) { + writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS); + writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS); + } + + // Write out the mapping regions. + // Split the regions into subarrays where each region in a + // subarray has a fileID which is the index of that subarray. + unsigned PrevLineStart = 0; + unsigned CurrentFileID = ~0U; + for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) { + if (I->FileID != CurrentFileID) { + // Ensure that all file ids have at least one mapping region. + assert(I->FileID == (CurrentFileID + 1)); + // Find the number of regions with this file id. + unsigned RegionCount = 1; + for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J) + ++RegionCount; + // Start a new region sub-array. + encodeULEB128(RegionCount, OS); + + CurrentFileID = I->FileID; + PrevLineStart = 0; + } + Counter Count = Minimizer.adjust(I->Count); + switch (I->Kind) { + case CounterMappingRegion::CodeRegion: + writeCounter(MinExpressions, Count, OS); + break; + case CounterMappingRegion::ExpansionRegion: { + assert(Count.isZero()); + assert(I->ExpandedFileID <= + (std::numeric_limits<unsigned>::max() >> + Counter::EncodingCounterTagAndExpansionRegionTagBits)); + // Mark an expansion region with a set bit that follows the counter tag, + // and pack the expanded file id into the remaining bits. + unsigned EncodedTagExpandedFileID = + (1 << Counter::EncodingTagBits) | + (I->ExpandedFileID + << Counter::EncodingCounterTagAndExpansionRegionTagBits); + encodeULEB128(EncodedTagExpandedFileID, OS); + break; + } + case CounterMappingRegion::SkippedRegion: + assert(Count.isZero()); + encodeULEB128(unsigned(I->Kind) + << Counter::EncodingCounterTagAndExpansionRegionTagBits, + OS); + break; + } + assert(I->LineStart >= PrevLineStart); + encodeULEB128(I->LineStart - PrevLineStart, OS); + encodeULEB128(I->ColumnStart, OS); + assert(I->LineEnd >= I->LineStart); + encodeULEB128(I->LineEnd - I->LineStart, OS); + encodeULEB128(I->ColumnEnd, OS); + PrevLineStart = I->LineStart; + } + // Ensure that all file ids have at least one mapping region. + assert(CurrentFileID == (VirtualFileMapping.size() - 1)); +} diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp new file mode 100644 index 0000000..027f0f7 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp @@ -0,0 +1,578 @@ +//=-- InstrProf.cpp - Instrumented profiling format support -----------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for clang's instrumentation based PGO and +// coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Compression.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +namespace { +class InstrProfErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.instrprof"; } + std::string message(int IE) const override { + instrprof_error E = static_cast<instrprof_error>(IE); + switch (E) { + case instrprof_error::success: + return "Success"; + case instrprof_error::eof: + return "End of File"; + case instrprof_error::unrecognized_format: + return "Unrecognized instrumentation profile encoding format"; + case instrprof_error::bad_magic: + return "Invalid instrumentation profile data (bad magic)"; + case instrprof_error::bad_header: + return "Invalid instrumentation profile data (file header is corrupt)"; + case instrprof_error::unsupported_version: + return "Unsupported instrumentation profile format version"; + case instrprof_error::unsupported_hash_type: + return "Unsupported instrumentation profile hash type"; + case instrprof_error::too_large: + return "Too much profile data"; + case instrprof_error::truncated: + return "Truncated profile data"; + case instrprof_error::malformed: + return "Malformed instrumentation profile data"; + case instrprof_error::unknown_function: + return "No profile data available for function"; + case instrprof_error::hash_mismatch: + return "Function control flow change detected (hash mismatch)"; + case instrprof_error::count_mismatch: + return "Function basic block count change detected (counter mismatch)"; + case instrprof_error::counter_overflow: + return "Counter overflow"; + case instrprof_error::value_site_count_mismatch: + return "Function value site count change detected (counter mismatch)"; + } + llvm_unreachable("A value of instrprof_error has no message."); + } +}; +} + +static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::instrprof_category() { + return *ErrorCategory; +} + +namespace llvm { + +std::string getPGOFuncName(StringRef RawFuncName, + GlobalValue::LinkageTypes Linkage, + StringRef FileName, + uint64_t Version LLVM_ATTRIBUTE_UNUSED) { + + // Function names may be prefixed with a binary '1' to indicate + // that the backend should not modify the symbols due to any platform + // naming convention. Do not include that '1' in the PGO profile name. + if (RawFuncName[0] == '\1') + RawFuncName = RawFuncName.substr(1); + + std::string FuncName = RawFuncName; + if (llvm::GlobalValue::isLocalLinkage(Linkage)) { + // For local symbols, prepend the main file name to distinguish them. + // Do not include the full path in the file name since there's no guarantee + // that it will stay the same, e.g., if the files are checked out from + // version control in different locations. + if (FileName.empty()) + FuncName = FuncName.insert(0, "<unknown>:"); + else + FuncName = FuncName.insert(0, FileName.str() + ":"); + } + return FuncName; +} + +std::string getPGOFuncName(const Function &F, uint64_t Version) { + return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(), + Version); +} + +StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) { + if (FileName.empty()) + return PGOFuncName; + // Drop the file name including ':'. See also getPGOFuncName. + if (PGOFuncName.startswith(FileName)) + PGOFuncName = PGOFuncName.drop_front(FileName.size() + 1); + return PGOFuncName; +} + +// \p FuncName is the string used as profile lookup key for the function. A +// symbol is created to hold the name. Return the legalized symbol name. +static std::string getPGOFuncNameVarName(StringRef FuncName, + GlobalValue::LinkageTypes Linkage) { + std::string VarName = getInstrProfNameVarPrefix(); + VarName += FuncName; + + if (!GlobalValue::isLocalLinkage(Linkage)) + return VarName; + + // Now fix up illegal chars in local VarName that may upset the assembler. + const char *InvalidChars = "-:<>\"'"; + size_t found = VarName.find_first_of(InvalidChars); + while (found != std::string::npos) { + VarName[found] = '_'; + found = VarName.find_first_of(InvalidChars, found + 1); + } + return VarName; +} + +GlobalVariable *createPGOFuncNameVar(Module &M, + GlobalValue::LinkageTypes Linkage, + StringRef FuncName) { + + // We generally want to match the function's linkage, but available_externally + // and extern_weak both have the wrong semantics, and anything that doesn't + // need to link across compilation units doesn't need to be visible at all. + if (Linkage == GlobalValue::ExternalWeakLinkage) + Linkage = GlobalValue::LinkOnceAnyLinkage; + else if (Linkage == GlobalValue::AvailableExternallyLinkage) + Linkage = GlobalValue::LinkOnceODRLinkage; + else if (Linkage == GlobalValue::InternalLinkage || + Linkage == GlobalValue::ExternalLinkage) + Linkage = GlobalValue::PrivateLinkage; + + auto *Value = ConstantDataArray::getString(M.getContext(), FuncName, false); + auto FuncNameVar = + new GlobalVariable(M, Value->getType(), true, Linkage, Value, + getPGOFuncNameVarName(FuncName, Linkage)); + + // Hide the symbol so that we correctly get a copy for each executable. + if (!GlobalValue::isLocalLinkage(FuncNameVar->getLinkage())) + FuncNameVar->setVisibility(GlobalValue::HiddenVisibility); + + return FuncNameVar; +} + +GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName) { + return createPGOFuncNameVar(*F.getParent(), F.getLinkage(), FuncName); +} + +int collectPGOFuncNameStrings(const std::vector<std::string> &NameStrs, + bool doCompression, std::string &Result) { + uint8_t Header[16], *P = Header; + std::string UncompressedNameStrings = + join(NameStrs.begin(), NameStrs.end(), StringRef(" ")); + + unsigned EncLen = encodeULEB128(UncompressedNameStrings.length(), P); + P += EncLen; + + auto WriteStringToResult = [&](size_t CompressedLen, + const std::string &InputStr) { + EncLen = encodeULEB128(CompressedLen, P); + P += EncLen; + char *HeaderStr = reinterpret_cast<char *>(&Header[0]); + unsigned HeaderLen = P - &Header[0]; + Result.append(HeaderStr, HeaderLen); + Result += InputStr; + return 0; + }; + + if (!doCompression) + return WriteStringToResult(0, UncompressedNameStrings); + + SmallVector<char, 128> CompressedNameStrings; + zlib::Status Success = + zlib::compress(StringRef(UncompressedNameStrings), CompressedNameStrings, + zlib::BestSizeCompression); + + if (Success != zlib::StatusOK) + return 1; + + return WriteStringToResult( + CompressedNameStrings.size(), + std::string(CompressedNameStrings.data(), CompressedNameStrings.size())); +} + +StringRef getPGOFuncNameInitializer(GlobalVariable *NameVar) { + auto *Arr = cast<ConstantDataArray>(NameVar->getInitializer()); + StringRef NameStr = + Arr->isCString() ? Arr->getAsCString() : Arr->getAsString(); + return NameStr; +} + +int collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars, + std::string &Result) { + std::vector<std::string> NameStrs; + for (auto *NameVar : NameVars) { + NameStrs.push_back(getPGOFuncNameInitializer(NameVar)); + } + return collectPGOFuncNameStrings(NameStrs, zlib::isAvailable(), Result); +} + +int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) { + const uint8_t *P = reinterpret_cast<const uint8_t *>(NameStrings.data()); + const uint8_t *EndP = reinterpret_cast<const uint8_t *>(NameStrings.data() + + NameStrings.size()); + while (P < EndP) { + uint32_t N; + uint64_t UncompressedSize = decodeULEB128(P, &N); + P += N; + uint64_t CompressedSize = decodeULEB128(P, &N); + P += N; + bool isCompressed = (CompressedSize != 0); + SmallString<128> UncompressedNameStrings; + StringRef NameStrings; + if (isCompressed) { + StringRef CompressedNameStrings(reinterpret_cast<const char *>(P), + CompressedSize); + if (zlib::uncompress(CompressedNameStrings, UncompressedNameStrings, + UncompressedSize) != zlib::StatusOK) + return 1; + P += CompressedSize; + NameStrings = StringRef(UncompressedNameStrings.data(), + UncompressedNameStrings.size()); + } else { + NameStrings = + StringRef(reinterpret_cast<const char *>(P), UncompressedSize); + P += UncompressedSize; + } + // Now parse the name strings. + SmallVector<StringRef, 0> Names; + NameStrings.split(Names, ' '); + for (StringRef &Name : Names) + Symtab.addFuncName(Name); + + while (P < EndP && *P == 0) + P++; + } + Symtab.finalizeSymtab(); + return 0; +} + +instrprof_error +InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input, + uint64_t Weight) { + this->sortByTargetValues(); + Input.sortByTargetValues(); + auto I = ValueData.begin(); + auto IE = ValueData.end(); + instrprof_error Result = instrprof_error::success; + for (auto J = Input.ValueData.begin(), JE = Input.ValueData.end(); J != JE; + ++J) { + while (I != IE && I->Value < J->Value) + ++I; + if (I != IE && I->Value == J->Value) { + uint64_t JCount = J->Count; + bool Overflowed; + if (Weight > 1) { + JCount = SaturatingMultiply(JCount, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + I->Count = SaturatingAdd(I->Count, JCount, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + ++I; + continue; + } + ValueData.insert(I, *J); + } + return Result; +} + +// Merge Value Profile data from Src record to this record for ValueKind. +// Scale merged value counts by \p Weight. +instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind, + InstrProfRecord &Src, + uint64_t Weight) { + uint32_t ThisNumValueSites = getNumValueSites(ValueKind); + uint32_t OtherNumValueSites = Src.getNumValueSites(ValueKind); + if (ThisNumValueSites != OtherNumValueSites) + return instrprof_error::value_site_count_mismatch; + std::vector<InstrProfValueSiteRecord> &ThisSiteRecords = + getValueSitesForKind(ValueKind); + std::vector<InstrProfValueSiteRecord> &OtherSiteRecords = + Src.getValueSitesForKind(ValueKind); + instrprof_error Result = instrprof_error::success; + for (uint32_t I = 0; I < ThisNumValueSites; I++) + MergeResult(Result, + ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight)); + return Result; +} + +instrprof_error InstrProfRecord::merge(InstrProfRecord &Other, + uint64_t Weight) { + // If the number of counters doesn't match we either have bad data + // or a hash collision. + if (Counts.size() != Other.Counts.size()) + return instrprof_error::count_mismatch; + + instrprof_error Result = instrprof_error::success; + + for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { + bool Overflowed; + uint64_t OtherCount = Other.Counts[I]; + if (Weight > 1) { + OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed); + if (Overflowed) + Result = instrprof_error::counter_overflow; + } + + for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) + MergeResult(Result, mergeValueProfData(Kind, Other, Weight)); + + return Result; +} + +// Map indirect call target name hash to name string. +uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind, + ValueMapType *ValueMap) { + if (!ValueMap) + return Value; + switch (ValueKind) { + case IPVK_IndirectCallTarget: { + auto Result = + std::lower_bound(ValueMap->begin(), ValueMap->end(), Value, + [](const std::pair<uint64_t, uint64_t> &LHS, + uint64_t RHS) { return LHS.first < RHS; }); + if (Result != ValueMap->end()) + Value = (uint64_t)Result->second; + break; + } + } + return Value; +} + +void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site, + InstrProfValueData *VData, uint32_t N, + ValueMapType *ValueMap) { + for (uint32_t I = 0; I < N; I++) { + VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap); + } + std::vector<InstrProfValueSiteRecord> &ValueSites = + getValueSitesForKind(ValueKind); + if (N == 0) + ValueSites.push_back(InstrProfValueSiteRecord()); + else + ValueSites.emplace_back(VData, VData + N); +} + +#define INSTR_PROF_COMMON_API_IMPL +#include "llvm/ProfileData/InstrProfData.inc" + +/*! + * \brief ValueProfRecordClosure Interface implementation for InstrProfRecord + * class. These C wrappers are used as adaptors so that C++ code can be + * invoked as callbacks. + */ +uint32_t getNumValueKindsInstrProf(const void *Record) { + return reinterpret_cast<const InstrProfRecord *>(Record)->getNumValueKinds(); +} + +uint32_t getNumValueSitesInstrProf(const void *Record, uint32_t VKind) { + return reinterpret_cast<const InstrProfRecord *>(Record) + ->getNumValueSites(VKind); +} + +uint32_t getNumValueDataInstrProf(const void *Record, uint32_t VKind) { + return reinterpret_cast<const InstrProfRecord *>(Record) + ->getNumValueData(VKind); +} + +uint32_t getNumValueDataForSiteInstrProf(const void *R, uint32_t VK, + uint32_t S) { + return reinterpret_cast<const InstrProfRecord *>(R) + ->getNumValueDataForSite(VK, S); +} + +void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst, + uint32_t K, uint32_t S, + uint64_t (*Mapper)(uint32_t, uint64_t)) { + return reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite( + Dst, K, S, Mapper); +} + +ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) { + ValueProfData *VD = + (ValueProfData *)(new (::operator new(TotalSizeInBytes)) ValueProfData()); + memset(VD, 0, TotalSizeInBytes); + return VD; +} + +static ValueProfRecordClosure InstrProfRecordClosure = { + 0, + getNumValueKindsInstrProf, + getNumValueSitesInstrProf, + getNumValueDataInstrProf, + getNumValueDataForSiteInstrProf, + 0, + getValueForSiteInstrProf, + allocValueProfDataInstrProf}; + +// Wrapper implementation using the closure mechanism. +uint32_t ValueProfData::getSize(const InstrProfRecord &Record) { + InstrProfRecordClosure.Record = &Record; + return getValueProfDataSize(&InstrProfRecordClosure); +} + +// Wrapper implementation using the closure mechanism. +std::unique_ptr<ValueProfData> +ValueProfData::serializeFrom(const InstrProfRecord &Record) { + InstrProfRecordClosure.Record = &Record; + + std::unique_ptr<ValueProfData> VPD( + serializeValueProfDataFrom(&InstrProfRecordClosure, nullptr)); + return VPD; +} + +void ValueProfRecord::deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap) { + Record.reserveSites(Kind, NumValueSites); + + InstrProfValueData *ValueData = getValueProfRecordValueData(this); + for (uint64_t VSite = 0; VSite < NumValueSites; ++VSite) { + uint8_t ValueDataCount = this->SiteCountArray[VSite]; + Record.addValueData(Kind, VSite, ValueData, ValueDataCount, VMap); + ValueData += ValueDataCount; + } +} + +// For writing/serializing, Old is the host endianness, and New is +// byte order intended on disk. For Reading/deserialization, Old +// is the on-disk source endianness, and New is the host endianness. +void ValueProfRecord::swapBytes(support::endianness Old, + support::endianness New) { + using namespace support; + if (Old == New) + return; + + if (getHostEndianness() != Old) { + sys::swapByteOrder<uint32_t>(NumValueSites); + sys::swapByteOrder<uint32_t>(Kind); + } + uint32_t ND = getValueProfRecordNumValueData(this); + InstrProfValueData *VD = getValueProfRecordValueData(this); + + // No need to swap byte array: SiteCountArrray. + for (uint32_t I = 0; I < ND; I++) { + sys::swapByteOrder<uint64_t>(VD[I].Value); + sys::swapByteOrder<uint64_t>(VD[I].Count); + } + if (getHostEndianness() == Old) { + sys::swapByteOrder<uint32_t>(NumValueSites); + sys::swapByteOrder<uint32_t>(Kind); + } +} + +void ValueProfData::deserializeTo(InstrProfRecord &Record, + InstrProfRecord::ValueMapType *VMap) { + if (NumValueKinds == 0) + return; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + VR->deserializeTo(Record, VMap); + VR = getValueProfRecordNext(VR); + } +} + +template <class T> +static T swapToHostOrder(const unsigned char *&D, support::endianness Orig) { + using namespace support; + if (Orig == little) + return endian::readNext<T, little, unaligned>(D); + else + return endian::readNext<T, big, unaligned>(D); +} + +static std::unique_ptr<ValueProfData> allocValueProfData(uint32_t TotalSize) { + return std::unique_ptr<ValueProfData>(new (::operator new(TotalSize)) + ValueProfData()); +} + +instrprof_error ValueProfData::checkIntegrity() { + if (NumValueKinds > IPVK_Last + 1) + return instrprof_error::malformed; + // Total size needs to be mulltiple of quadword size. + if (TotalSize % sizeof(uint64_t)) + return instrprof_error::malformed; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < this->NumValueKinds; K++) { + if (VR->Kind > IPVK_Last) + return instrprof_error::malformed; + VR = getValueProfRecordNext(VR); + if ((char *)VR - (char *)this > (ptrdiff_t)TotalSize) + return instrprof_error::malformed; + } + return instrprof_error::success; +} + +ErrorOr<std::unique_ptr<ValueProfData>> +ValueProfData::getValueProfData(const unsigned char *D, + const unsigned char *const BufferEnd, + support::endianness Endianness) { + using namespace support; + if (D + sizeof(ValueProfData) > BufferEnd) + return instrprof_error::truncated; + + const unsigned char *Header = D; + uint32_t TotalSize = swapToHostOrder<uint32_t>(Header, Endianness); + if (D + TotalSize > BufferEnd) + return instrprof_error::too_large; + + std::unique_ptr<ValueProfData> VPD = allocValueProfData(TotalSize); + memcpy(VPD.get(), D, TotalSize); + // Byte swap. + VPD->swapBytesToHost(Endianness); + + instrprof_error EC = VPD->checkIntegrity(); + if (EC != instrprof_error::success) + return EC; + + return std::move(VPD); +} + +void ValueProfData::swapBytesToHost(support::endianness Endianness) { + using namespace support; + if (Endianness == getHostEndianness()) + return; + + sys::swapByteOrder<uint32_t>(TotalSize); + sys::swapByteOrder<uint32_t>(NumValueKinds); + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + VR->swapBytes(Endianness, getHostEndianness()); + VR = getValueProfRecordNext(VR); + } +} + +void ValueProfData::swapBytesFromHost(support::endianness Endianness) { + using namespace support; + if (Endianness == getHostEndianness()) + return; + + ValueProfRecord *VR = getFirstValueProfRecord(this); + for (uint32_t K = 0; K < NumValueKinds; K++) { + ValueProfRecord *NVR = getValueProfRecordNext(VR); + VR->swapBytes(getHostEndianness(), Endianness); + VR = NVR; + } + sys::swapByteOrder<uint32_t>(TotalSize); + sys::swapByteOrder<uint32_t>(NumValueKinds); +} + +} diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp new file mode 100644 index 0000000..5e83456 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp @@ -0,0 +1,653 @@ +//=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading profiling data for clang's +// instrumentation based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/ADT/STLExtras.h" +#include <cassert> + +using namespace llvm; + +static ErrorOr<std::unique_ptr<MemoryBuffer>> +setupMemoryBuffer(std::string Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = BufferOrErr.getError()) + return EC; + return std::move(BufferOrErr.get()); +} + +static std::error_code initializeReader(InstrProfReader &Reader) { + return Reader.readHeader(); +} + +ErrorOr<std::unique_ptr<InstrProfReader>> +InstrProfReader::create(std::string Path) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path); + if (std::error_code EC = BufferOrError.getError()) + return EC; + return InstrProfReader::create(std::move(BufferOrError.get())); +} + +ErrorOr<std::unique_ptr<InstrProfReader>> +InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { + // Sanity check the buffer. + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + return instrprof_error::too_large; + + std::unique_ptr<InstrProfReader> Result; + // Create the reader. + if (IndexedInstrProfReader::hasFormat(*Buffer)) + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + else if (RawInstrProfReader64::hasFormat(*Buffer)) + Result.reset(new RawInstrProfReader64(std::move(Buffer))); + else if (RawInstrProfReader32::hasFormat(*Buffer)) + Result.reset(new RawInstrProfReader32(std::move(Buffer))); + else if (TextInstrProfReader::hasFormat(*Buffer)) + Result.reset(new TextInstrProfReader(std::move(Buffer))); + else + return instrprof_error::unrecognized_format; + + // Initialize the reader and return the result. + if (std::error_code EC = initializeReader(*Result)) + return EC; + + return std::move(Result); +} + +ErrorOr<std::unique_ptr<IndexedInstrProfReader>> +IndexedInstrProfReader::create(std::string Path) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path); + if (std::error_code EC = BufferOrError.getError()) + return EC; + return IndexedInstrProfReader::create(std::move(BufferOrError.get())); +} + + +ErrorOr<std::unique_ptr<IndexedInstrProfReader>> +IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { + // Sanity check the buffer. + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + return instrprof_error::too_large; + + // Create the reader. + if (!IndexedInstrProfReader::hasFormat(*Buffer)) + return instrprof_error::bad_magic; + auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer)); + + // Initialize the reader and return the result. + if (std::error_code EC = initializeReader(*Result)) + return EC; + + return std::move(Result); +} + +void InstrProfIterator::Increment() { + if (Reader->readNextRecord(Record)) + *this = InstrProfIterator(); +} + +bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { + // Verify that this really looks like plain ASCII text by checking a + // 'reasonable' number of characters (up to profile magic size). + size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); + StringRef buffer = Buffer.getBufferStart(); + return count == 0 || + std::all_of(buffer.begin(), buffer.begin() + count, + [](char c) { return ::isprint(c) || ::isspace(c); }); +} + +std::error_code TextInstrProfReader::readHeader() { + Symtab.reset(new InstrProfSymtab()); + return success(); +} + +std::error_code +TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) { + +#define CHECK_LINE_END(Line) \ + if (Line.is_at_end()) \ + return error(instrprof_error::truncated); +#define READ_NUM(Str, Dst) \ + if ((Str).getAsInteger(10, (Dst))) \ + return error(instrprof_error::malformed); +#define VP_READ_ADVANCE(Val) \ + CHECK_LINE_END(Line); \ + uint32_t Val; \ + READ_NUM((*Line), (Val)); \ + Line++; + + if (Line.is_at_end()) + return success(); + + uint32_t NumValueKinds; + if (Line->getAsInteger(10, NumValueKinds)) { + // No value profile data + return success(); + } + if (NumValueKinds == 0 || NumValueKinds > IPVK_Last + 1) + return error(instrprof_error::malformed); + Line++; + + for (uint32_t VK = 0; VK < NumValueKinds; VK++) { + VP_READ_ADVANCE(ValueKind); + if (ValueKind > IPVK_Last) + return error(instrprof_error::malformed); + VP_READ_ADVANCE(NumValueSites); + if (!NumValueSites) + continue; + + Record.reserveSites(VK, NumValueSites); + for (uint32_t S = 0; S < NumValueSites; S++) { + VP_READ_ADVANCE(NumValueData); + + std::vector<InstrProfValueData> CurrentValues; + for (uint32_t V = 0; V < NumValueData; V++) { + CHECK_LINE_END(Line); + std::pair<StringRef, StringRef> VD = Line->split(':'); + uint64_t TakenCount, Value; + if (VK == IPVK_IndirectCallTarget) { + Symtab->addFuncName(VD.first); + Value = IndexedInstrProf::ComputeHash(VD.first); + } else { + READ_NUM(VD.first, Value); + } + READ_NUM(VD.second, TakenCount); + CurrentValues.push_back({Value, TakenCount}); + Line++; + } + Record.addValueData(VK, S, CurrentValues.data(), NumValueData, nullptr); + } + } + return success(); + +#undef CHECK_LINE_END +#undef READ_NUM +#undef VP_READ_ADVANCE +} + +std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { + // Skip empty lines and comments. + while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) + ++Line; + // If we hit EOF while looking for a name, we're done. + if (Line.is_at_end()) { + Symtab->finalizeSymtab(); + return error(instrprof_error::eof); + } + + // Read the function name. + Record.Name = *Line++; + Symtab->addFuncName(Record.Name); + + // Read the function hash. + if (Line.is_at_end()) + return error(instrprof_error::truncated); + if ((Line++)->getAsInteger(0, Record.Hash)) + return error(instrprof_error::malformed); + + // Read the number of counters. + uint64_t NumCounters; + if (Line.is_at_end()) + return error(instrprof_error::truncated); + if ((Line++)->getAsInteger(10, NumCounters)) + return error(instrprof_error::malformed); + if (NumCounters == 0) + return error(instrprof_error::malformed); + + // Read each counter and fill our internal storage with the values. + Record.Counts.clear(); + Record.Counts.reserve(NumCounters); + for (uint64_t I = 0; I < NumCounters; ++I) { + if (Line.is_at_end()) + return error(instrprof_error::truncated); + uint64_t Count; + if ((Line++)->getAsInteger(10, Count)) + return error(instrprof_error::malformed); + Record.Counts.push_back(Count); + } + + // Check if value profile data exists and read it if so. + if (std::error_code EC = readValueProfileData(Record)) + return EC; + + // This is needed to avoid two pass parsing because llvm-profdata + // does dumping while reading. + Symtab->finalizeSymtab(); + return success(); +} + +template <class IntPtrT> +bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < sizeof(uint64_t)) + return false; + uint64_t Magic = + *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); + return RawInstrProf::getMagic<IntPtrT>() == Magic || + sys::getSwappedBytes(RawInstrProf::getMagic<IntPtrT>()) == Magic; +} + +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readHeader() { + if (!hasFormat(*DataBuffer)) + return error(instrprof_error::bad_magic); + if (DataBuffer->getBufferSize() < sizeof(RawInstrProf::Header)) + return error(instrprof_error::bad_header); + auto *Header = reinterpret_cast<const RawInstrProf::Header *>( + DataBuffer->getBufferStart()); + ShouldSwapBytes = Header->Magic != RawInstrProf::getMagic<IntPtrT>(); + return readHeader(*Header); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { + const char *End = DataBuffer->getBufferEnd(); + // Skip zero padding between profiles. + while (CurrentPos != End && *CurrentPos == 0) + ++CurrentPos; + // If there's nothing left, we're done. + if (CurrentPos == End) + return instrprof_error::eof; + // If there isn't enough space for another header, this is probably just + // garbage at the end of the file. + if (CurrentPos + sizeof(RawInstrProf::Header) > End) + return instrprof_error::malformed; + // The writer ensures each profile is padded to start at an aligned address. + if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>()) + return instrprof_error::malformed; + // The magic should have the same byte order as in the previous header. + uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); + if (Magic != swap(RawInstrProf::getMagic<IntPtrT>())) + return instrprof_error::bad_magic; + + // There's another profile to read, so we need to process the header. + auto *Header = reinterpret_cast<const RawInstrProf::Header *>(CurrentPos); + return readHeader(*Header); +} + +template <class IntPtrT> +void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) { + for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) { + StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize)); + Symtab.addFuncName(FunctionName); + const IntPtrT FPtr = swap(I->FunctionPointer); + if (!FPtr) + continue; + Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName)); + } + Symtab.finalizeSymtab(); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) { + if (swap(Header.Version) != RawInstrProf::Version) + return error(instrprof_error::unsupported_version); + + CountersDelta = swap(Header.CountersDelta); + NamesDelta = swap(Header.NamesDelta); + auto DataSize = swap(Header.DataSize); + auto CountersSize = swap(Header.CountersSize); + auto NamesSize = swap(Header.NamesSize); + auto ValueDataSize = swap(Header.ValueDataSize); + ValueKindLast = swap(Header.ValueKindLast); + + auto DataSizeInBytes = DataSize * sizeof(RawInstrProf::ProfileData<IntPtrT>); + auto PaddingSize = getNumPaddingBytes(NamesSize); + + ptrdiff_t DataOffset = sizeof(RawInstrProf::Header); + ptrdiff_t CountersOffset = DataOffset + DataSizeInBytes; + ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; + ptrdiff_t ValueDataOffset = NamesOffset + NamesSize + PaddingSize; + size_t ProfileSize = ValueDataOffset + ValueDataSize; + + auto *Start = reinterpret_cast<const char *>(&Header); + if (Start + ProfileSize > DataBuffer->getBufferEnd()) + return error(instrprof_error::bad_header); + + Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>( + Start + DataOffset); + DataEnd = Data + DataSize; + CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); + NamesStart = Start + NamesOffset; + ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset); + ProfileEnd = Start + ProfileSize; + + std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>(); + createSymtab(*NewSymtab.get()); + Symtab = std::move(NewSymtab); + return success(); +} + +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readName(InstrProfRecord &Record) { + Record.Name = StringRef(getName(Data->NamePtr), swap(Data->NameSize)); + if (Record.Name.data() < NamesStart || + Record.Name.data() + Record.Name.size() > + reinterpret_cast<const char *>(ValueDataStart)) + return error(instrprof_error::malformed); + return success(); +} + +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readFuncHash( + InstrProfRecord &Record) { + Record.Hash = swap(Data->FuncHash); + return success(); +} + +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readRawCounts( + InstrProfRecord &Record) { + uint32_t NumCounters = swap(Data->NumCounters); + IntPtrT CounterPtr = Data->CounterPtr; + if (NumCounters == 0) + return error(instrprof_error::malformed); + + auto RawCounts = makeArrayRef(getCounter(CounterPtr), NumCounters); + auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); + + // Check bounds. + if (RawCounts.data() < CountersStart || + RawCounts.data() + RawCounts.size() > NamesStartAsCounter) + return error(instrprof_error::malformed); + + if (ShouldSwapBytes) { + Record.Counts.clear(); + Record.Counts.reserve(RawCounts.size()); + for (uint64_t Count : RawCounts) + Record.Counts.push_back(swap(Count)); + } else + Record.Counts = RawCounts; + + return success(); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readValueProfilingData(InstrProfRecord &Record) { + + Record.clearValueData(); + CurValueDataSize = 0; + // Need to match the logic in value profile dumper code in compiler-rt: + uint32_t NumValueKinds = 0; + for (uint32_t I = 0; I < IPVK_Last + 1; I++) + NumValueKinds += (Data->NumValueSites[I] != 0); + + if (!NumValueKinds) + return success(); + + ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr = + ValueProfData::getValueProfData(ValueDataStart, + (const unsigned char *)ProfileEnd, + getDataEndianness()); + + if (VDataPtrOrErr.getError()) + return VDataPtrOrErr.getError(); + + VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap()); + CurValueDataSize = VDataPtrOrErr.get()->getSize(); + return success(); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { + if (atEnd()) + if (std::error_code EC = readNextHeader(ProfileEnd)) + return EC; + + // Read name ad set it in Record. + if (std::error_code EC = readName(Record)) + return EC; + + // Read FuncHash and set it in Record. + if (std::error_code EC = readFuncHash(Record)) + return EC; + + // Read raw counts and set Record. + if (std::error_code EC = readRawCounts(Record)) + return EC; + + // Read value data and set Record. + if (std::error_code EC = readValueProfilingData(Record)) + return EC; + + // Iterate. + advanceData(); + return success(); +} + +namespace llvm { +template class RawInstrProfReader<uint32_t>; +template class RawInstrProfReader<uint64_t>; +} + +InstrProfLookupTrait::hash_value_type +InstrProfLookupTrait::ComputeHash(StringRef K) { + return IndexedInstrProf::ComputeHash(HashType, K); +} + +typedef InstrProfLookupTrait::data_type data_type; +typedef InstrProfLookupTrait::offset_type offset_type; + +bool InstrProfLookupTrait::readValueProfilingData( + const unsigned char *&D, const unsigned char *const End) { + ErrorOr<std::unique_ptr<ValueProfData>> VDataPtrOrErr = + ValueProfData::getValueProfData(D, End, ValueProfDataEndianness); + + if (VDataPtrOrErr.getError()) + return false; + + VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr); + D += VDataPtrOrErr.get()->TotalSize; + + return true; +} + +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, + offset_type N) { + // Check if the data is corrupt. If so, don't try to read it. + if (N % sizeof(uint64_t)) + return data_type(); + + DataBuffer.clear(); + std::vector<uint64_t> CounterBuffer; + + using namespace support; + const unsigned char *End = D + N; + while (D < End) { + // Read hash. + if (D + sizeof(uint64_t) >= End) + return data_type(); + uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); + + // Initialize number of counters for FormatVersion == 1. + uint64_t CountsSize = N / sizeof(uint64_t) - 1; + // If format version is different then read the number of counters. + if (FormatVersion != 1) { + if (D + sizeof(uint64_t) > End) + return data_type(); + CountsSize = endian::readNext<uint64_t, little, unaligned>(D); + } + // Read counter values. + if (D + CountsSize * sizeof(uint64_t) > End) + return data_type(); + + CounterBuffer.clear(); + CounterBuffer.reserve(CountsSize); + for (uint64_t J = 0; J < CountsSize; ++J) + CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); + + DataBuffer.emplace_back(K, Hash, std::move(CounterBuffer)); + + // Read value profiling data. + if (FormatVersion > 2 && !readValueProfilingData(D, End)) { + DataBuffer.clear(); + return data_type(); + } + } + return DataBuffer; +} + +template <typename HashTableImpl> +std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords( + StringRef FuncName, ArrayRef<InstrProfRecord> &Data) { + auto Iter = HashTable->find(FuncName); + if (Iter == HashTable->end()) + return instrprof_error::unknown_function; + + Data = (*Iter); + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +template <typename HashTableImpl> +std::error_code InstrProfReaderIndex<HashTableImpl>::getRecords( + ArrayRef<InstrProfRecord> &Data) { + if (atEnd()) + return instrprof_error::eof; + + Data = *RecordIterator; + + if (Data.empty()) + return instrprof_error::malformed; + + return instrprof_error::success; +} + +template <typename HashTableImpl> +InstrProfReaderIndex<HashTableImpl>::InstrProfReaderIndex( + const unsigned char *Buckets, const unsigned char *const Payload, + const unsigned char *const Base, IndexedInstrProf::HashT HashType, + uint64_t Version) { + FormatVersion = Version; + HashTable.reset(HashTableImpl::Create( + Buckets, Payload, Base, + typename HashTableImpl::InfoType(HashType, Version))); + RecordIterator = HashTable->data_begin(); +} + +bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < 8) + return false; + using namespace support; + uint64_t Magic = + endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); + // Verify that it's magical. + return Magic == IndexedInstrProf::Magic; +} + +std::error_code IndexedInstrProfReader::readHeader() { + const unsigned char *Start = + (const unsigned char *)DataBuffer->getBufferStart(); + const unsigned char *Cur = Start; + if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) + return error(instrprof_error::truncated); + + using namespace support; + + auto *Header = reinterpret_cast<const IndexedInstrProf::Header *>(Cur); + Cur += sizeof(IndexedInstrProf::Header); + + // Check the magic number. + uint64_t Magic = endian::byte_swap<uint64_t, little>(Header->Magic); + if (Magic != IndexedInstrProf::Magic) + return error(instrprof_error::bad_magic); + + // Read the version. + uint64_t FormatVersion = endian::byte_swap<uint64_t, little>(Header->Version); + if (FormatVersion > IndexedInstrProf::Version) + return error(instrprof_error::unsupported_version); + + // Read the maximal function count. + MaxFunctionCount = + endian::byte_swap<uint64_t, little>(Header->MaxFunctionCount); + + // Read the hash type and start offset. + IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( + endian::byte_swap<uint64_t, little>(Header->HashType)); + if (HashType > IndexedInstrProf::HashT::Last) + return error(instrprof_error::unsupported_hash_type); + + uint64_t HashOffset = endian::byte_swap<uint64_t, little>(Header->HashOffset); + + // The rest of the file is an on disk hash table. + InstrProfReaderIndexBase *IndexPtr = nullptr; + IndexPtr = new InstrProfReaderIndex<OnDiskHashTableImplV3>( + Start + HashOffset, Cur, Start, HashType, FormatVersion); + Index.reset(IndexPtr); + return success(); +} + +InstrProfSymtab &IndexedInstrProfReader::getSymtab() { + if (Symtab.get()) + return *Symtab.get(); + + std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>(); + Index->populateSymtab(*NewSymtab.get()); + + Symtab = std::move(NewSymtab); + return *Symtab.get(); +} + +ErrorOr<InstrProfRecord> +IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName, + uint64_t FuncHash) { + ArrayRef<InstrProfRecord> Data; + std::error_code EC = Index->getRecords(FuncName, Data); + if (EC != instrprof_error::success) + return EC; + // Found it. Look for counters with the right hash. + for (unsigned I = 0, E = Data.size(); I < E; ++I) { + // Check for a match and fill the vector if there is one. + if (Data[I].Hash == FuncHash) { + return std::move(Data[I]); + } + } + return error(instrprof_error::hash_mismatch); +} + +std::error_code +IndexedInstrProfReader::getFunctionCounts(StringRef FuncName, uint64_t FuncHash, + std::vector<uint64_t> &Counts) { + ErrorOr<InstrProfRecord> Record = getInstrProfRecord(FuncName, FuncHash); + if (std::error_code EC = Record.getError()) + return EC; + + Counts = Record.get().Counts; + return success(); +} + +std::error_code IndexedInstrProfReader::readNextRecord( + InstrProfRecord &Record) { + static unsigned RecordIndex = 0; + + ArrayRef<InstrProfRecord> Data; + + std::error_code EC = Index->getRecords(Data); + if (EC != instrprof_error::success) + return error(EC); + + Record = Data[RecordIndex++]; + if (RecordIndex >= Data.size()) { + Index->advanceToNextKey(); + RecordIndex = 0; + } + return success(); +} diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp new file mode 100644 index 0000000..9bb03e1 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -0,0 +1,252 @@ +//=-- InstrProfWriter.cpp - Instrumented profiling writer -------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing profiling data for clang's +// instrumentation based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProfWriter.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/OnDiskHashTable.h" +#include <tuple> + +using namespace llvm; + +namespace { +static support::endianness ValueProfDataEndianness = support::little; + +class InstrProfRecordTrait { +public: + typedef StringRef key_type; + typedef StringRef key_type_ref; + + typedef const InstrProfWriter::ProfilingData *const data_type; + typedef const InstrProfWriter::ProfilingData *const data_type_ref; + + typedef uint64_t hash_value_type; + typedef uint64_t offset_type; + + static hash_value_type ComputeHash(key_type_ref K) { + return IndexedInstrProf::ComputeHash(K); + } + + static std::pair<offset_type, offset_type> + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace llvm::support; + endian::Writer<little> LE(Out); + + offset_type N = K.size(); + LE.write<offset_type>(N); + + offset_type M = 0; + for (const auto &ProfileData : *V) { + const InstrProfRecord &ProfRecord = ProfileData.second; + M += sizeof(uint64_t); // The function hash + M += sizeof(uint64_t); // The size of the Counts vector + M += ProfRecord.Counts.size() * sizeof(uint64_t); + + // Value data + M += ValueProfData::getSize(ProfileData.second); + } + LE.write<offset_type>(M); + + return std::make_pair(N, M); + } + + static void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N){ + Out.write(K.data(), N); + } + + static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, + offset_type) { + using namespace llvm::support; + endian::Writer<little> LE(Out); + for (const auto &ProfileData : *V) { + const InstrProfRecord &ProfRecord = ProfileData.second; + + LE.write<uint64_t>(ProfileData.first); // Function hash + LE.write<uint64_t>(ProfRecord.Counts.size()); + for (uint64_t I : ProfRecord.Counts) + LE.write<uint64_t>(I); + + // Write value data + std::unique_ptr<ValueProfData> VDataPtr = + ValueProfData::serializeFrom(ProfileData.second); + uint32_t S = VDataPtr->getSize(); + VDataPtr->swapBytesFromHost(ValueProfDataEndianness); + Out.write((const char *)VDataPtr.get(), S); + } + } +}; +} + +// Internal interface for testing purpose only. +void InstrProfWriter::setValueProfDataEndianness( + support::endianness Endianness) { + ValueProfDataEndianness = Endianness; +} + +std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I, + uint64_t Weight) { + auto &ProfileDataMap = FunctionData[I.Name]; + + bool NewFunc; + ProfilingData::iterator Where; + std::tie(Where, NewFunc) = + ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord())); + InstrProfRecord &Dest = Where->second; + + instrprof_error Result; + if (NewFunc) { + // We've never seen a function with this name and hash, add it. + Dest = std::move(I); + // Fix up the name to avoid dangling reference. + Dest.Name = FunctionData.find(Dest.Name)->getKey(); + Result = instrprof_error::success; + if (Weight > 1) { + for (auto &Count : Dest.Counts) { + bool Overflowed; + Count = SaturatingMultiply(Count, Weight, &Overflowed); + if (Overflowed && Result == instrprof_error::success) { + Result = instrprof_error::counter_overflow; + } + } + } + } else { + // We're updating a function we've seen before. + Result = Dest.merge(I, Weight); + } + + // We keep track of the max function count as we go for simplicity. + // Update this statistic no matter the result of the merge. + if (Dest.Counts[0] > MaxFunctionCount) + MaxFunctionCount = Dest.Counts[0]; + + return Result; +} + +std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) { + OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator; + + // Populate the hash table generator. + for (const auto &I : FunctionData) + Generator.insert(I.getKey(), &I.getValue()); + + using namespace llvm::support; + endian::Writer<little> LE(OS); + + // Write the header. + IndexedInstrProf::Header Header; + Header.Magic = IndexedInstrProf::Magic; + Header.Version = IndexedInstrProf::Version; + Header.MaxFunctionCount = MaxFunctionCount; + Header.HashType = static_cast<uint64_t>(IndexedInstrProf::HashType); + Header.HashOffset = 0; + int N = sizeof(IndexedInstrProf::Header) / sizeof(uint64_t); + + // Only write out all the fields execpt 'HashOffset'. We need + // to remember the offset of that field to allow back patching + // later. + for (int I = 0; I < N - 1; I++) + LE.write<uint64_t>(reinterpret_cast<uint64_t *>(&Header)[I]); + + // Save a space to write the hash table start location. + uint64_t HashTableStartLoc = OS.tell(); + // Reserve the space for HashOffset field. + LE.write<uint64_t>(0); + // Write the hash table. + uint64_t HashTableStart = Generator.Emit(OS); + + return std::make_pair(HashTableStartLoc, HashTableStart); +} + +void InstrProfWriter::write(raw_fd_ostream &OS) { + // Write the hash table. + auto TableStart = writeImpl(OS); + + // Go back and fill in the hash table start. + using namespace support; + OS.seek(TableStart.first); + // Now patch the HashOffset field previously reserved. + endian::Writer<little>(OS).write<uint64_t>(TableStart.second); +} + +static const char *ValueProfKindStr[] = { +#define VALUE_PROF_KIND(Enumerator, Value) #Enumerator, +#include "llvm/ProfileData/InstrProfData.inc" +}; + +void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func, + InstrProfSymtab &Symtab, + raw_fd_ostream &OS) { + OS << Func.Name << "\n"; + OS << "# Func Hash:\n" << Func.Hash << "\n"; + OS << "# Num Counters:\n" << Func.Counts.size() << "\n"; + OS << "# Counter Values:\n"; + for (uint64_t Count : Func.Counts) + OS << Count << "\n"; + + uint32_t NumValueKinds = Func.getNumValueKinds(); + if (!NumValueKinds) { + OS << "\n"; + return; + } + + OS << "# Num Value Kinds:\n" << Func.getNumValueKinds() << "\n"; + for (uint32_t VK = 0; VK < IPVK_Last + 1; VK++) { + uint32_t NS = Func.getNumValueSites(VK); + if (!NS) + continue; + OS << "# ValueKind = " << ValueProfKindStr[VK] << ":\n" << VK << "\n"; + OS << "# NumValueSites:\n" << NS << "\n"; + for (uint32_t S = 0; S < NS; S++) { + uint32_t ND = Func.getNumValueDataForSite(VK, S); + OS << ND << "\n"; + std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S); + for (uint32_t I = 0; I < ND; I++) { + if (VK == IPVK_IndirectCallTarget) + OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n"; + else + OS << VD[I].Value << ":" << VD[I].Count << "\n"; + } + } + } + + OS << "\n"; +} + +void InstrProfWriter::writeText(raw_fd_ostream &OS) { + InstrProfSymtab Symtab; + for (const auto &I : FunctionData) + Symtab.addFuncName(I.getKey()); + Symtab.finalizeSymtab(); + + for (const auto &I : FunctionData) + for (const auto &Func : I.getValue()) + writeRecordInText(Func.second, Symtab, OS); +} + +std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { + std::string Data; + llvm::raw_string_ostream OS(Data); + // Write the hash table. + auto TableStart = writeImpl(OS); + OS.flush(); + + // Go back and fill in the hash table start. + using namespace support; + uint64_t Bytes = endian::byte_swap<uint64_t, little>(TableStart.second); + Data.replace(TableStart.first, sizeof(uint64_t), (const char *)&Bytes, + sizeof(uint64_t)); + + // Return this in an aligned memory buffer. + return MemoryBuffer::getMemBufferCopy(Data); +} diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp new file mode 100644 index 0000000..9ded757 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp @@ -0,0 +1,149 @@ +//=-- SampleProf.cpp - Sample profiling format support --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains common definitions used in the reading and writing of +// sample profile data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm::sampleprof; +using namespace llvm; + +namespace { +class SampleProfErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.sampleprof"; } + std::string message(int IE) const override { + sampleprof_error E = static_cast<sampleprof_error>(IE); + switch (E) { + case sampleprof_error::success: + return "Success"; + case sampleprof_error::bad_magic: + return "Invalid sample profile data (bad magic)"; + case sampleprof_error::unsupported_version: + return "Unsupported sample profile format version"; + case sampleprof_error::too_large: + return "Too much profile data"; + case sampleprof_error::truncated: + return "Truncated profile data"; + case sampleprof_error::malformed: + return "Malformed sample profile data"; + case sampleprof_error::unrecognized_format: + return "Unrecognized sample profile encoding format"; + case sampleprof_error::unsupported_writing_format: + return "Profile encoding format unsupported for writing operations"; + case sampleprof_error::truncated_name_table: + return "Truncated function name table"; + case sampleprof_error::not_implemented: + return "Unimplemented feature"; + case sampleprof_error::counter_overflow: + return "Counter overflow"; + } + llvm_unreachable("A value of sampleprof_error has no message."); + } +}; +} + +static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::sampleprof_category() { + return *ErrorCategory; +} + +void LineLocation::print(raw_ostream &OS) const { + OS << LineOffset; + if (Discriminator > 0) + OS << "." << Discriminator; +} + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const LineLocation &Loc) { + Loc.print(OS); + return OS; +} + +void LineLocation::dump() const { print(dbgs()); } + +void CallsiteLocation::print(raw_ostream &OS) const { + LineLocation::print(OS); + OS << ": inlined callee: " << CalleeName; +} + +void CallsiteLocation::dump() const { print(dbgs()); } + +inline raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const CallsiteLocation &Loc) { + Loc.print(OS); + return OS; +} + +/// \brief Print the sample record to the stream \p OS indented by \p Indent. +void SampleRecord::print(raw_ostream &OS, unsigned Indent) const { + OS << NumSamples; + if (hasCalls()) { + OS << ", calls:"; + for (const auto &I : getCallTargets()) + OS << " " << I.first() << ":" << I.second; + } + OS << "\n"; +} + +void SampleRecord::dump() const { print(dbgs(), 0); } + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const SampleRecord &Sample) { + Sample.print(OS, 0); + return OS; +} + +/// \brief Print the samples collected for a function on stream \p OS. +void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const { + OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() + << " sampled lines\n"; + + OS.indent(Indent); + if (BodySamples.size() > 0) { + OS << "Samples collected in the function's body {\n"; + SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples); + for (const auto &SI : SortedBodySamples.get()) { + OS.indent(Indent + 2); + OS << SI->first << ": " << SI->second; + } + OS.indent(Indent); + OS << "}\n"; + } else { + OS << "No samples collected in the function's body\n"; + } + + OS.indent(Indent); + if (CallsiteSamples.size() > 0) { + OS << "Samples collected in inlined callsites {\n"; + SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples( + CallsiteSamples); + for (const auto &CS : SortedCallsiteSamples.get()) { + OS.indent(Indent + 2); + OS << CS->first << ": "; + CS->second.print(OS, Indent + 4); + } + OS << "}\n"; + } else { + OS << "No inlined callsites in this function\n"; + } +} + +raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS, + const FunctionSamples &FS) { + FS.print(OS); + return OS; +} + +void FunctionSamples::dump(void) const { print(dbgs(), 0); } diff --git a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp new file mode 100644 index 0000000..93cd87b --- /dev/null +++ b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp @@ -0,0 +1,727 @@ +//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that reads LLVM sample profiles. It +// supports three file formats: text, binary and gcov. +// +// The textual representation is useful for debugging and testing purposes. The +// binary representation is more compact, resulting in smaller file sizes. +// +// The gcov encoding is the one generated by GCC's AutoFDO profile creation +// tool (https://github.com/google/autofdo) +// +// All three encodings can be used interchangeably as an input sample profile. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm::sampleprof; +using namespace llvm; + +/// \brief Dump the function profile for \p FName. +/// +/// \param FName Name of the function to print. +/// \param OS Stream to emit the output to. +void SampleProfileReader::dumpFunctionProfile(StringRef FName, + raw_ostream &OS) { + OS << "Function: " << FName << ": " << Profiles[FName]; +} + +/// \brief Dump all the function profiles found on stream \p OS. +void SampleProfileReader::dump(raw_ostream &OS) { + for (const auto &I : Profiles) + dumpFunctionProfile(I.getKey(), OS); +} + +/// \brief Parse \p Input as function head. +/// +/// Parse one line of \p Input, and update function name in \p FName, +/// function's total sample count in \p NumSamples, function's entry +/// count in \p NumHeadSamples. +/// +/// \returns true if parsing is successful. +static bool ParseHead(const StringRef &Input, StringRef &FName, + uint64_t &NumSamples, uint64_t &NumHeadSamples) { + if (Input[0] == ' ') + return false; + size_t n2 = Input.rfind(':'); + size_t n1 = Input.rfind(':', n2 - 1); + FName = Input.substr(0, n1); + if (Input.substr(n1 + 1, n2 - n1 - 1).getAsInteger(10, NumSamples)) + return false; + if (Input.substr(n2 + 1).getAsInteger(10, NumHeadSamples)) + return false; + return true; +} + + +/// \brief Returns true if line offset \p L is legal (only has 16 bits). +static bool isOffsetLegal(unsigned L) { + return (L & 0xffff) == L; +} + +/// \brief Parse \p Input as line sample. +/// +/// \param Input input line. +/// \param IsCallsite true if the line represents an inlined callsite. +/// \param Depth the depth of the inline stack. +/// \param NumSamples total samples of the line/inlined callsite. +/// \param LineOffset line offset to the start of the function. +/// \param Discriminator discriminator of the line. +/// \param TargetCountMap map from indirect call target to count. +/// +/// returns true if parsing is successful. +static bool ParseLine(const StringRef &Input, bool &IsCallsite, uint32_t &Depth, + uint64_t &NumSamples, uint32_t &LineOffset, + uint32_t &Discriminator, StringRef &CalleeName, + DenseMap<StringRef, uint64_t> &TargetCountMap) { + for (Depth = 0; Input[Depth] == ' '; Depth++) + ; + if (Depth == 0) + return false; + + size_t n1 = Input.find(':'); + StringRef Loc = Input.substr(Depth, n1 - Depth); + size_t n2 = Loc.find('.'); + if (n2 == StringRef::npos) { + if (Loc.getAsInteger(10, LineOffset) || !isOffsetLegal(LineOffset)) + return false; + Discriminator = 0; + } else { + if (Loc.substr(0, n2).getAsInteger(10, LineOffset)) + return false; + if (Loc.substr(n2 + 1).getAsInteger(10, Discriminator)) + return false; + } + + StringRef Rest = Input.substr(n1 + 2); + if (Rest[0] >= '0' && Rest[0] <= '9') { + IsCallsite = false; + size_t n3 = Rest.find(' '); + if (n3 == StringRef::npos) { + if (Rest.getAsInteger(10, NumSamples)) + return false; + } else { + if (Rest.substr(0, n3).getAsInteger(10, NumSamples)) + return false; + } + while (n3 != StringRef::npos) { + n3 += Rest.substr(n3).find_first_not_of(' '); + Rest = Rest.substr(n3); + n3 = Rest.find(' '); + StringRef pair = Rest; + if (n3 != StringRef::npos) { + pair = Rest.substr(0, n3); + } + size_t n4 = pair.find(':'); + uint64_t count; + if (pair.substr(n4 + 1).getAsInteger(10, count)) + return false; + TargetCountMap[pair.substr(0, n4)] = count; + } + } else { + IsCallsite = true; + size_t n3 = Rest.find_last_of(':'); + CalleeName = Rest.substr(0, n3); + if (Rest.substr(n3 + 1).getAsInteger(10, NumSamples)) + return false; + } + return true; +} + +/// \brief Load samples from a text file. +/// +/// See the documentation at the top of the file for an explanation of +/// the expected format. +/// +/// \returns true if the file was loaded successfully, false otherwise. +std::error_code SampleProfileReaderText::read() { + line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); + sampleprof_error Result = sampleprof_error::success; + + InlineCallStack InlineStack; + + for (; !LineIt.is_at_eof(); ++LineIt) { + if ((*LineIt)[(*LineIt).find_first_not_of(' ')] == '#') + continue; + // Read the header of each function. + // + // Note that for function identifiers we are actually expecting + // mangled names, but we may not always get them. This happens when + // the compiler decides not to emit the function (e.g., it was inlined + // and removed). In this case, the binary will not have the linkage + // name for the function, so the profiler will emit the function's + // unmangled name, which may contain characters like ':' and '>' in its + // name (member functions, templates, etc). + // + // The only requirement we place on the identifier, then, is that it + // should not begin with a number. + if ((*LineIt)[0] != ' ') { + uint64_t NumSamples, NumHeadSamples; + StringRef FName; + if (!ParseHead(*LineIt, FName, NumSamples, NumHeadSamples)) { + reportError(LineIt.line_number(), + "Expected 'mangled_name:NUM:NUM', found " + *LineIt); + return sampleprof_error::malformed; + } + Profiles[FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FName]; + MergeResult(Result, FProfile.addTotalSamples(NumSamples)); + MergeResult(Result, FProfile.addHeadSamples(NumHeadSamples)); + InlineStack.clear(); + InlineStack.push_back(&FProfile); + } else { + uint64_t NumSamples; + StringRef FName; + DenseMap<StringRef, uint64_t> TargetCountMap; + bool IsCallsite; + uint32_t Depth, LineOffset, Discriminator; + if (!ParseLine(*LineIt, IsCallsite, Depth, NumSamples, LineOffset, + Discriminator, FName, TargetCountMap)) { + reportError(LineIt.line_number(), + "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + + *LineIt); + return sampleprof_error::malformed; + } + if (IsCallsite) { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FSamples = InlineStack.back()->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, FName)); + MergeResult(Result, FSamples.addTotalSamples(NumSamples)); + InlineStack.push_back(&FSamples); + } else { + while (InlineStack.size() > Depth) { + InlineStack.pop_back(); + } + FunctionSamples &FProfile = *InlineStack.back(); + for (const auto &name_count : TargetCountMap) { + MergeResult(Result, FProfile.addCalledTargetSamples( + LineOffset, Discriminator, name_count.first, + name_count.second)); + } + MergeResult(Result, FProfile.addBodySamples(LineOffset, Discriminator, + NumSamples)); + } + } + } + + return Result; +} + +bool SampleProfileReaderText::hasFormat(const MemoryBuffer &Buffer) { + bool result = false; + + // Check that the first non-comment line is a valid function header. + line_iterator LineIt(Buffer, /*SkipBlanks=*/true, '#'); + if (!LineIt.is_at_eof()) { + if ((*LineIt)[0] != ' ') { + uint64_t NumSamples, NumHeadSamples; + StringRef FName; + result = ParseHead(*LineIt, FName, NumSamples, NumHeadSamples); + } + } + + return result; +} + +template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { + unsigned NumBytesRead = 0; + std::error_code EC; + uint64_t Val = decodeULEB128(Data, &NumBytesRead); + + if (Val > std::numeric_limits<T>::max()) + EC = sampleprof_error::malformed; + else if (Data + NumBytesRead > End) + EC = sampleprof_error::truncated; + else + EC = sampleprof_error::success; + + if (EC) { + reportError(0, EC.message()); + return EC; + } + + Data += NumBytesRead; + return static_cast<T>(Val); +} + +ErrorOr<StringRef> SampleProfileReaderBinary::readString() { + std::error_code EC; + StringRef Str(reinterpret_cast<const char *>(Data)); + if (Data + Str.size() + 1 > End) { + EC = sampleprof_error::truncated; + reportError(0, EC.message()); + return EC; + } + + Data += Str.size() + 1; + return Str; +} + +ErrorOr<StringRef> SampleProfileReaderBinary::readStringFromTable() { + std::error_code EC; + auto Idx = readNumber<uint32_t>(); + if (std::error_code EC = Idx.getError()) + return EC; + if (*Idx >= NameTable.size()) + return sampleprof_error::truncated_name_table; + return NameTable[*Idx]; +} + +std::error_code +SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) { + auto NumSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumSamples.getError()) + return EC; + FProfile.addTotalSamples(*NumSamples); + + // Read the samples in the body. + auto NumRecords = readNumber<uint32_t>(); + if (std::error_code EC = NumRecords.getError()) + return EC; + + for (uint32_t I = 0; I < *NumRecords; ++I) { + auto LineOffset = readNumber<uint64_t>(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + if (!isOffsetLegal(*LineOffset)) { + return std::error_code(); + } + + auto Discriminator = readNumber<uint64_t>(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto NumSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumSamples.getError()) + return EC; + + auto NumCalls = readNumber<uint32_t>(); + if (std::error_code EC = NumCalls.getError()) + return EC; + + for (uint32_t J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readStringFromTable()); + if (std::error_code EC = CalledFunction.getError()) + return EC; + + auto CalledFunctionSamples = readNumber<uint64_t>(); + if (std::error_code EC = CalledFunctionSamples.getError()) + return EC; + + FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + *CalledFunction, *CalledFunctionSamples); + } + + FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + } + + // Read all the samples for inlined function calls. + auto NumCallsites = readNumber<uint32_t>(); + if (std::error_code EC = NumCallsites.getError()) + return EC; + + for (uint32_t J = 0; J < *NumCallsites; ++J) { + auto LineOffset = readNumber<uint64_t>(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + auto Discriminator = readNumber<uint64_t>(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + FunctionSamples &CalleeProfile = FProfile.functionSamplesAt( + CallsiteLocation(*LineOffset, *Discriminator, *FName)); + if (std::error_code EC = readProfile(CalleeProfile)) + return EC; + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::read() { + while (!at_eof()) { + auto NumHeadSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumHeadSamples.getError()) + return EC; + + auto FName(readStringFromTable()); + if (std::error_code EC = FName.getError()) + return EC; + + Profiles[*FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[*FName]; + + FProfile.addHeadSamples(*NumHeadSamples); + + if (std::error_code EC = readProfile(FProfile)) + return EC; + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::readHeader() { + Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); + End = Data + Buffer->getBufferSize(); + + // Read and check the magic identifier. + auto Magic = readNumber<uint64_t>(); + if (std::error_code EC = Magic.getError()) + return EC; + else if (*Magic != SPMagic()) + return sampleprof_error::bad_magic; + + // Read the version number. + auto Version = readNumber<uint64_t>(); + if (std::error_code EC = Version.getError()) + return EC; + else if (*Version != SPVersion()) + return sampleprof_error::unsupported_version; + + // Read the name table. + auto Size = readNumber<uint32_t>(); + if (std::error_code EC = Size.getError()) + return EC; + NameTable.reserve(*Size); + for (uint32_t I = 0; I < *Size; ++I) { + auto Name(readString()); + if (std::error_code EC = Name.getError()) + return EC; + NameTable.push_back(*Name); + } + + return sampleprof_error::success; +} + +bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) { + const uint8_t *Data = + reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); + uint64_t Magic = decodeULEB128(Data); + return Magic == SPMagic(); +} + +std::error_code SampleProfileReaderGCC::skipNextWord() { + uint32_t dummy; + if (!GcovBuffer.readInt(dummy)) + return sampleprof_error::truncated; + return sampleprof_error::success; +} + +template <typename T> ErrorOr<T> SampleProfileReaderGCC::readNumber() { + if (sizeof(T) <= sizeof(uint32_t)) { + uint32_t Val; + if (GcovBuffer.readInt(Val) && Val <= std::numeric_limits<T>::max()) + return static_cast<T>(Val); + } else if (sizeof(T) <= sizeof(uint64_t)) { + uint64_t Val; + if (GcovBuffer.readInt64(Val) && Val <= std::numeric_limits<T>::max()) + return static_cast<T>(Val); + } + + std::error_code EC = sampleprof_error::malformed; + reportError(0, EC.message()); + return EC; +} + +ErrorOr<StringRef> SampleProfileReaderGCC::readString() { + StringRef Str; + if (!GcovBuffer.readString(Str)) + return sampleprof_error::truncated; + return Str; +} + +std::error_code SampleProfileReaderGCC::readHeader() { + // Read the magic identifier. + if (!GcovBuffer.readGCDAFormat()) + return sampleprof_error::unrecognized_format; + + // Read the version number. Note - the GCC reader does not validate this + // version, but the profile creator generates v704. + GCOV::GCOVVersion version; + if (!GcovBuffer.readGCOVVersion(version)) + return sampleprof_error::unrecognized_format; + + if (version != GCOV::V704) + return sampleprof_error::unsupported_version; + + // Skip the empty integer. + if (std::error_code EC = skipNextWord()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readSectionTag(uint32_t Expected) { + uint32_t Tag; + if (!GcovBuffer.readInt(Tag)) + return sampleprof_error::truncated; + + if (Tag != Expected) + return sampleprof_error::malformed; + + if (std::error_code EC = skipNextWord()) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readNameTable() { + if (std::error_code EC = readSectionTag(GCOVTagAFDOFileNames)) + return EC; + + uint32_t Size; + if (!GcovBuffer.readInt(Size)) + return sampleprof_error::truncated; + + for (uint32_t I = 0; I < Size; ++I) { + StringRef Str; + if (!GcovBuffer.readString(Str)) + return sampleprof_error::truncated; + Names.push_back(Str); + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readFunctionProfiles() { + if (std::error_code EC = readSectionTag(GCOVTagAFDOFunction)) + return EC; + + uint32_t NumFunctions; + if (!GcovBuffer.readInt(NumFunctions)) + return sampleprof_error::truncated; + + InlineCallStack Stack; + for (uint32_t I = 0; I < NumFunctions; ++I) + if (std::error_code EC = readOneFunctionProfile(Stack, true, 0)) + return EC; + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderGCC::readOneFunctionProfile( + const InlineCallStack &InlineStack, bool Update, uint32_t Offset) { + uint64_t HeadCount = 0; + if (InlineStack.size() == 0) + if (!GcovBuffer.readInt64(HeadCount)) + return sampleprof_error::truncated; + + uint32_t NameIdx; + if (!GcovBuffer.readInt(NameIdx)) + return sampleprof_error::truncated; + + StringRef Name(Names[NameIdx]); + + uint32_t NumPosCounts; + if (!GcovBuffer.readInt(NumPosCounts)) + return sampleprof_error::truncated; + + uint32_t NumCallsites; + if (!GcovBuffer.readInt(NumCallsites)) + return sampleprof_error::truncated; + + FunctionSamples *FProfile = nullptr; + if (InlineStack.size() == 0) { + // If this is a top function that we have already processed, do not + // update its profile again. This happens in the presence of + // function aliases. Since these aliases share the same function + // body, there will be identical replicated profiles for the + // original function. In this case, we simply not bother updating + // the profile of the original function. + FProfile = &Profiles[Name]; + FProfile->addHeadSamples(HeadCount); + if (FProfile->getTotalSamples() > 0) + Update = false; + } else { + // Otherwise, we are reading an inlined instance. The top of the + // inline stack contains the profile of the caller. Insert this + // callee in the caller's CallsiteMap. + FunctionSamples *CallerProfile = InlineStack.front(); + uint32_t LineOffset = Offset >> 16; + uint32_t Discriminator = Offset & 0xffff; + FProfile = &CallerProfile->functionSamplesAt( + CallsiteLocation(LineOffset, Discriminator, Name)); + } + + for (uint32_t I = 0; I < NumPosCounts; ++I) { + uint32_t Offset; + if (!GcovBuffer.readInt(Offset)) + return sampleprof_error::truncated; + + uint32_t NumTargets; + if (!GcovBuffer.readInt(NumTargets)) + return sampleprof_error::truncated; + + uint64_t Count; + if (!GcovBuffer.readInt64(Count)) + return sampleprof_error::truncated; + + // The line location is encoded in the offset as: + // high 16 bits: line offset to the start of the function. + // low 16 bits: discriminator. + uint32_t LineOffset = Offset >> 16; + uint32_t Discriminator = Offset & 0xffff; + + InlineCallStack NewStack; + NewStack.push_back(FProfile); + NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end()); + if (Update) { + // Walk up the inline stack, adding the samples on this line to + // the total sample count of the callers in the chain. + for (auto CallerProfile : NewStack) + CallerProfile->addTotalSamples(Count); + + // Update the body samples for the current profile. + FProfile->addBodySamples(LineOffset, Discriminator, Count); + } + + // Process the list of functions called at an indirect call site. + // These are all the targets that a function pointer (or virtual + // function) resolved at runtime. + for (uint32_t J = 0; J < NumTargets; J++) { + uint32_t HistVal; + if (!GcovBuffer.readInt(HistVal)) + return sampleprof_error::truncated; + + if (HistVal != HIST_TYPE_INDIR_CALL_TOPN) + return sampleprof_error::malformed; + + uint64_t TargetIdx; + if (!GcovBuffer.readInt64(TargetIdx)) + return sampleprof_error::truncated; + StringRef TargetName(Names[TargetIdx]); + + uint64_t TargetCount; + if (!GcovBuffer.readInt64(TargetCount)) + return sampleprof_error::truncated; + + if (Update) { + FunctionSamples &TargetProfile = Profiles[TargetName]; + TargetProfile.addCalledTargetSamples(LineOffset, Discriminator, + TargetName, TargetCount); + } + } + } + + // Process all the inlined callers into the current function. These + // are all the callsites that were inlined into this function. + for (uint32_t I = 0; I < NumCallsites; I++) { + // The offset is encoded as: + // high 16 bits: line offset to the start of the function. + // low 16 bits: discriminator. + uint32_t Offset; + if (!GcovBuffer.readInt(Offset)) + return sampleprof_error::truncated; + InlineCallStack NewStack; + NewStack.push_back(FProfile); + NewStack.insert(NewStack.end(), InlineStack.begin(), InlineStack.end()); + if (std::error_code EC = readOneFunctionProfile(NewStack, Update, Offset)) + return EC; + } + + return sampleprof_error::success; +} + +/// \brief Read a GCC AutoFDO profile. +/// +/// This format is generated by the Linux Perf conversion tool at +/// https://github.com/google/autofdo. +std::error_code SampleProfileReaderGCC::read() { + // Read the string table. + if (std::error_code EC = readNameTable()) + return EC; + + // Read the source profile. + if (std::error_code EC = readFunctionProfiles()) + return EC; + + return sampleprof_error::success; +} + +bool SampleProfileReaderGCC::hasFormat(const MemoryBuffer &Buffer) { + StringRef Magic(reinterpret_cast<const char *>(Buffer.getBufferStart())); + return Magic == "adcg*704"; +} + +/// \brief Prepare a memory buffer for the contents of \p Filename. +/// +/// \returns an error code indicating the status of the buffer. +static ErrorOr<std::unique_ptr<MemoryBuffer>> +setupMemoryBuffer(std::string Filename) { + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferOrErr.getError()) + return EC; + auto Buffer = std::move(BufferOrErr.get()); + + // Sanity check the file. + if (Buffer->getBufferSize() > std::numeric_limits<uint32_t>::max()) + return sampleprof_error::too_large; + + return std::move(Buffer); +} + +/// \brief Create a sample profile reader based on the format of the input file. +/// +/// \param Filename The file to open. +/// +/// \param Reader The reader to instantiate according to \p Filename's format. +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr<std::unique_ptr<SampleProfileReader>> +SampleProfileReader::create(StringRef Filename, LLVMContext &C) { + auto BufferOrError = setupMemoryBuffer(Filename); + if (std::error_code EC = BufferOrError.getError()) + return EC; + return create(BufferOrError.get(), C); +} + +/// \brief Create a sample profile reader based on the format of the input data. +/// +/// \param B The memory buffer to create the reader from (assumes ownership). +/// +/// \param Reader The reader to instantiate according to \p Filename's format. +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr<std::unique_ptr<SampleProfileReader>> +SampleProfileReader::create(std::unique_ptr<MemoryBuffer> &B, LLVMContext &C) { + std::unique_ptr<SampleProfileReader> Reader; + if (SampleProfileReaderBinary::hasFormat(*B)) + Reader.reset(new SampleProfileReaderBinary(std::move(B), C)); + else if (SampleProfileReaderGCC::hasFormat(*B)) + Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); + else if (SampleProfileReaderText::hasFormat(*B)) + Reader.reset(new SampleProfileReaderText(std::move(B), C)); + else + return sampleprof_error::unrecognized_format; + + if (std::error_code EC = Reader->readHeader()) + return EC; + + return std::move(Reader); +} diff --git a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp new file mode 100644 index 0000000..51feee5 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -0,0 +1,240 @@ +//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that writes LLVM sample profiles. It +// supports two file formats: text and binary. The textual representation +// is useful for debugging and testing purposes. The binary representation +// is more compact, resulting in smaller file sizes. However, they can +// both be used interchangeably. +// +// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the +// supported formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfWriter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" + +using namespace llvm::sampleprof; +using namespace llvm; + +/// \brief Write samples to a text file. +/// +/// Note: it may be tempting to implement this in terms of +/// FunctionSamples::print(). Please don't. The dump functionality is intended +/// for debugging and has no specified form. +/// +/// The format used here is more structured and deliberate because +/// it needs to be parsed by the SampleProfileReaderText class. +std::error_code SampleProfileWriterText::write(StringRef FName, + const FunctionSamples &S) { + auto &OS = *OutputStream; + + OS << FName << ":" << S.getTotalSamples(); + if (Indent == 0) + OS << ":" << S.getHeadSamples(); + OS << "\n"; + + SampleSorter<LineLocation, SampleRecord> SortedSamples(S.getBodySamples()); + for (const auto &I : SortedSamples.get()) { + LineLocation Loc = I->first; + const SampleRecord &Sample = I->second; + OS.indent(Indent + 1); + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + + OS << Sample.getSamples(); + + for (const auto &J : Sample.getCallTargets()) + OS << " " << J.first() << ":" << J.second; + OS << "\n"; + } + + SampleSorter<CallsiteLocation, FunctionSamples> SortedCallsiteSamples( + S.getCallsiteSamples()); + Indent += 1; + for (const auto &I : SortedCallsiteSamples.get()) { + CallsiteLocation Loc = I->first; + const FunctionSamples &CalleeSamples = I->second; + OS.indent(Indent); + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + if (std::error_code EC = write(Loc.CalleeName, CalleeSamples)) + return EC; + } + Indent -= 1; + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { + const auto &ret = NameTable.find(FName); + if (ret == NameTable.end()) + return sampleprof_error::truncated_name_table; + encodeULEB128(ret->second, *OutputStream); + return sampleprof_error::success; +} + +void SampleProfileWriterBinary::addName(StringRef FName) { + auto NextIdx = NameTable.size(); + NameTable.insert(std::make_pair(FName, NextIdx)); +} + +void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { + // Add all the names in indirect call targets. + for (const auto &I : S.getBodySamples()) { + const SampleRecord &Sample = I.second; + for (const auto &J : Sample.getCallTargets()) + addName(J.first()); + } + + // Recursively add all the names for inlined callsites. + for (const auto &J : S.getCallsiteSamples()) { + CallsiteLocation Loc = J.first; + const FunctionSamples &CalleeSamples = J.second; + addName(Loc.CalleeName); + addNames(CalleeSamples); + } +} + +std::error_code SampleProfileWriterBinary::writeHeader( + const StringMap<FunctionSamples> &ProfileMap) { + auto &OS = *OutputStream; + + // Write file magic identifier. + encodeULEB128(SPMagic(), OS); + encodeULEB128(SPVersion(), OS); + + // Generate the name table for all the functions referenced in the profile. + for (const auto &I : ProfileMap) { + addName(I.first()); + addNames(I.second); + } + + // Write out the name table. + encodeULEB128(NameTable.size(), OS); + for (auto N : NameTable) { + OS << N.first; + encodeULEB128(0, OS); + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileWriterBinary::writeBody(StringRef FName, + const FunctionSamples &S) { + auto &OS = *OutputStream; + + if (std::error_code EC = writeNameIdx(FName)) + return EC; + + encodeULEB128(S.getTotalSamples(), OS); + + // Emit all the body samples. + encodeULEB128(S.getBodySamples().size(), OS); + for (const auto &I : S.getBodySamples()) { + LineLocation Loc = I.first; + const SampleRecord &Sample = I.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + encodeULEB128(Sample.getSamples(), OS); + encodeULEB128(Sample.getCallTargets().size(), OS); + for (const auto &J : Sample.getCallTargets()) { + StringRef Callee = J.first(); + uint64_t CalleeSamples = J.second; + if (std::error_code EC = writeNameIdx(Callee)) + return EC; + encodeULEB128(CalleeSamples, OS); + } + } + + // Recursively emit all the callsite samples. + encodeULEB128(S.getCallsiteSamples().size(), OS); + for (const auto &J : S.getCallsiteSamples()) { + CallsiteLocation Loc = J.first; + const FunctionSamples &CalleeSamples = J.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + if (std::error_code EC = writeBody(Loc.CalleeName, CalleeSamples)) + return EC; + } + + return sampleprof_error::success; +} + +/// \brief Write samples of a top-level function to a binary file. +/// +/// \returns true if the samples were written successfully, false otherwise. +std::error_code SampleProfileWriterBinary::write(StringRef FName, + const FunctionSamples &S) { + encodeULEB128(S.getHeadSamples(), *OutputStream); + return writeBody(FName, S); +} + +/// \brief Create a sample profile file writer based on the specified format. +/// +/// \param Filename The file to create. +/// +/// \param Writer The writer to instantiate according to the specified format. +/// +/// \param Format Encoding format for the profile file. +/// +/// \returns an error code indicating the status of the created writer. +ErrorOr<std::unique_ptr<SampleProfileWriter>> +SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { + std::error_code EC; + std::unique_ptr<raw_ostream> OS; + if (Format == SPF_Binary) + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None)); + else + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text)); + if (EC) + return EC; + + return create(OS, Format); +} + +/// \brief Create a sample profile stream writer based on the specified format. +/// +/// \param OS The output stream to store the profile data to. +/// +/// \param Writer The writer to instantiate according to the specified format. +/// +/// \param Format Encoding format for the profile file. +/// +/// \returns an error code indicating the status of the created writer. +ErrorOr<std::unique_ptr<SampleProfileWriter>> +SampleProfileWriter::create(std::unique_ptr<raw_ostream> &OS, + SampleProfileFormat Format) { + std::error_code EC; + std::unique_ptr<SampleProfileWriter> Writer; + + if (Format == SPF_Binary) + Writer.reset(new SampleProfileWriterBinary(OS)); + else if (Format == SPF_Text) + Writer.reset(new SampleProfileWriterText(OS)); + else if (Format == SPF_GCC) + EC = sampleprof_error::unsupported_writing_format; + else + EC = sampleprof_error::unrecognized_format; + + if (EC) + return EC; + + return std::move(Writer); +} |