diff options
Diffstat (limited to 'contrib/llvm/lib/ProfileData')
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMapping.cpp | 530 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp | 559 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp | 183 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProf.cpp | 63 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfIndexed.h | 56 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfReader.cpp | 431 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/InstrProfWriter.cpp | 159 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProf.cpp | 51 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProfReader.cpp | 399 | ||||
-rw-r--r-- | contrib/llvm/lib/ProfileData/SampleProfWriter.cpp | 126 |
10 files changed, 2557 insertions, 0 deletions
diff --git a/contrib/llvm/lib/ProfileData/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp new file mode 100644 index 0000000..cf04fea --- /dev/null +++ b/contrib/llvm/lib/ProfileData/CoverageMapping.cpp @@ -0,0 +1,530 @@ +//=-- CoverageMapping.cpp - Code coverage mapping support ---------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for clang's and llvm's instrumentation based +// code coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/CoverageMapping.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/ProfileData/CoverageMappingReader.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace coverage; + +#define DEBUG_TYPE "coverage-mapping" + +Counter CounterExpressionBuilder::get(const CounterExpression &E) { + auto It = ExpressionIndices.find(E); + if (It != ExpressionIndices.end()) + return Counter::getExpression(It->second); + unsigned I = Expressions.size(); + Expressions.push_back(E); + ExpressionIndices[E] = I; + return Counter::getExpression(I); +} + +void CounterExpressionBuilder::extractTerms( + Counter C, int Sign, SmallVectorImpl<std::pair<unsigned, int>> &Terms) { + switch (C.getKind()) { + case Counter::Zero: + break; + case Counter::CounterValueReference: + Terms.push_back(std::make_pair(C.getCounterID(), Sign)); + break; + case Counter::Expression: + const auto &E = Expressions[C.getExpressionID()]; + extractTerms(E.LHS, Sign, Terms); + extractTerms(E.RHS, E.Kind == CounterExpression::Subtract ? -Sign : Sign, + Terms); + break; + } +} + +Counter CounterExpressionBuilder::simplify(Counter ExpressionTree) { + // Gather constant terms. + llvm::SmallVector<std::pair<unsigned, int>, 32> Terms; + extractTerms(ExpressionTree, +1, Terms); + + // If there are no terms, this is just a zero. The algorithm below assumes at + // least one term. + if (Terms.size() == 0) + return Counter::getZero(); + + // Group the terms by counter ID. + std::sort(Terms.begin(), Terms.end(), + [](const std::pair<unsigned, int> &LHS, + const std::pair<unsigned, int> &RHS) { + return LHS.first < RHS.first; + }); + + // Combine terms by counter ID to eliminate counters that sum to zero. + auto Prev = Terms.begin(); + for (auto I = Prev + 1, E = Terms.end(); I != E; ++I) { + if (I->first == Prev->first) { + Prev->second += I->second; + continue; + } + ++Prev; + *Prev = *I; + } + Terms.erase(++Prev, Terms.end()); + + Counter C; + // Create additions. We do this before subtractions to avoid constructs like + // ((0 - X) + Y), as opposed to (Y - X). + for (auto Term : Terms) { + if (Term.second <= 0) + continue; + for (int I = 0; I < Term.second; ++I) + if (C.isZero()) + C = Counter::getCounter(Term.first); + else + C = get(CounterExpression(CounterExpression::Add, C, + Counter::getCounter(Term.first))); + } + + // Create subtractions. + for (auto Term : Terms) { + if (Term.second >= 0) + continue; + for (int I = 0; I < -Term.second; ++I) + C = get(CounterExpression(CounterExpression::Subtract, C, + Counter::getCounter(Term.first))); + } + return C; +} + +Counter CounterExpressionBuilder::add(Counter LHS, Counter RHS) { + return simplify(get(CounterExpression(CounterExpression::Add, LHS, RHS))); +} + +Counter CounterExpressionBuilder::subtract(Counter LHS, Counter RHS) { + return simplify( + get(CounterExpression(CounterExpression::Subtract, LHS, RHS))); +} + +void CounterMappingContext::dump(const Counter &C, + llvm::raw_ostream &OS) const { + switch (C.getKind()) { + case Counter::Zero: + OS << '0'; + return; + case Counter::CounterValueReference: + OS << '#' << C.getCounterID(); + break; + case Counter::Expression: { + if (C.getExpressionID() >= Expressions.size()) + return; + const auto &E = Expressions[C.getExpressionID()]; + OS << '('; + dump(E.LHS, OS); + OS << (E.Kind == CounterExpression::Subtract ? " - " : " + "); + dump(E.RHS, OS); + OS << ')'; + break; + } + } + if (CounterValues.empty()) + return; + ErrorOr<int64_t> Value = evaluate(C); + if (!Value) + return; + OS << '[' << *Value << ']'; +} + +ErrorOr<int64_t> CounterMappingContext::evaluate(const Counter &C) const { + switch (C.getKind()) { + case Counter::Zero: + return 0; + case Counter::CounterValueReference: + if (C.getCounterID() >= CounterValues.size()) + return make_error_code(errc::argument_out_of_domain); + return CounterValues[C.getCounterID()]; + case Counter::Expression: { + if (C.getExpressionID() >= Expressions.size()) + return make_error_code(errc::argument_out_of_domain); + const auto &E = Expressions[C.getExpressionID()]; + ErrorOr<int64_t> LHS = evaluate(E.LHS); + if (!LHS) + return LHS; + ErrorOr<int64_t> RHS = evaluate(E.RHS); + if (!RHS) + return RHS; + return E.Kind == CounterExpression::Subtract ? *LHS - *RHS : *LHS + *RHS; + } + } + llvm_unreachable("Unhandled CounterKind"); +} + +void FunctionRecordIterator::skipOtherFiles() { + while (Current != Records.end() && !Filename.empty() && + Filename != Current->Filenames[0]) + ++Current; + if (Current == Records.end()) + *this = FunctionRecordIterator(); +} + +/// Get the function name from the record, removing the filename prefix if +/// necessary. +static StringRef getFuncNameWithoutPrefix(const CoverageMappingRecord &Record) { + StringRef FunctionName = Record.FunctionName; + if (Record.Filenames.empty()) + return FunctionName; + StringRef Filename = sys::path::filename(Record.Filenames[0]); + if (FunctionName.startswith(Filename)) + FunctionName = FunctionName.drop_front(Filename.size() + 1); + return FunctionName; +} + +ErrorOr<std::unique_ptr<CoverageMapping>> +CoverageMapping::load(CoverageMappingReader &CoverageReader, + IndexedInstrProfReader &ProfileReader) { + auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping()); + + std::vector<uint64_t> Counts; + for (const auto &Record : CoverageReader) { + CounterMappingContext Ctx(Record.Expressions); + + Counts.clear(); + if (std::error_code EC = ProfileReader.getFunctionCounts( + Record.FunctionName, Record.FunctionHash, Counts)) { + if (EC == instrprof_error::hash_mismatch) { + Coverage->MismatchedFunctionCount++; + continue; + } else if (EC != instrprof_error::unknown_function) + return EC; + Counts.assign(Record.MappingRegions.size(), 0); + } + Ctx.setCounts(Counts); + + assert(!Record.MappingRegions.empty() && "Function has no regions"); + + FunctionRecord Function(getFuncNameWithoutPrefix(Record), Record.Filenames); + for (const auto &Region : Record.MappingRegions) { + ErrorOr<int64_t> ExecutionCount = Ctx.evaluate(Region.Count); + if (!ExecutionCount) + break; + Function.pushRegion(Region, *ExecutionCount); + } + if (Function.CountedRegions.size() != Record.MappingRegions.size()) { + Coverage->MismatchedFunctionCount++; + continue; + } + + Coverage->Functions.push_back(std::move(Function)); + } + + return std::move(Coverage); +} + +ErrorOr<std::unique_ptr<CoverageMapping>> +CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename, + StringRef Arch) { + auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename); + if (std::error_code EC = CounterMappingBuff.getError()) + return EC; + auto CoverageReaderOrErr = + BinaryCoverageReader::create(CounterMappingBuff.get(), Arch); + if (std::error_code EC = CoverageReaderOrErr.getError()) + return EC; + auto CoverageReader = std::move(CoverageReaderOrErr.get()); + auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename); + if (auto EC = ProfileReaderOrErr.getError()) + return EC; + auto ProfileReader = std::move(ProfileReaderOrErr.get()); + return load(*CoverageReader, *ProfileReader); +} + +namespace { +/// \brief Distributes functions into instantiation sets. +/// +/// An instantiation set is a collection of functions that have the same source +/// code, ie, template functions specializations. +class FunctionInstantiationSetCollector { + typedef DenseMap<std::pair<unsigned, unsigned>, + std::vector<const FunctionRecord *>> MapT; + MapT InstantiatedFunctions; + +public: + void insert(const FunctionRecord &Function, unsigned FileID) { + auto I = Function.CountedRegions.begin(), E = Function.CountedRegions.end(); + while (I != E && I->FileID != FileID) + ++I; + assert(I != E && "function does not cover the given file"); + auto &Functions = InstantiatedFunctions[I->startLoc()]; + Functions.push_back(&Function); + } + + MapT::iterator begin() { return InstantiatedFunctions.begin(); } + + MapT::iterator end() { return InstantiatedFunctions.end(); } +}; + +class SegmentBuilder { + std::vector<CoverageSegment> Segments; + SmallVector<const CountedRegion *, 8> ActiveRegions; + + /// Start a segment with no count specified. + void startSegment(unsigned Line, unsigned Col) { + DEBUG(dbgs() << "Top level segment at " << Line << ":" << Col << "\n"); + Segments.emplace_back(Line, Col, /*IsRegionEntry=*/false); + } + + /// Start a segment with the given Region's count. + void startSegment(unsigned Line, unsigned Col, bool IsRegionEntry, + const CountedRegion &Region) { + if (Segments.empty()) + Segments.emplace_back(Line, Col, IsRegionEntry); + CoverageSegment S = Segments.back(); + // Avoid creating empty regions. + if (S.Line != Line || S.Col != Col) { + Segments.emplace_back(Line, Col, IsRegionEntry); + S = Segments.back(); + } + DEBUG(dbgs() << "Segment at " << Line << ":" << Col); + // Set this region's count. + if (Region.Kind != coverage::CounterMappingRegion::SkippedRegion) { + DEBUG(dbgs() << " with count " << Region.ExecutionCount); + Segments.back().setCount(Region.ExecutionCount); + } + DEBUG(dbgs() << "\n"); + } + + /// Start a segment for the given region. + void startSegment(const CountedRegion &Region) { + startSegment(Region.LineStart, Region.ColumnStart, true, Region); + } + + /// Pop the top region off of the active stack, starting a new segment with + /// the containing Region's count. + void popRegion() { + const CountedRegion *Active = ActiveRegions.back(); + unsigned Line = Active->LineEnd, Col = Active->ColumnEnd; + ActiveRegions.pop_back(); + if (ActiveRegions.empty()) + startSegment(Line, Col); + else + startSegment(Line, Col, false, *ActiveRegions.back()); + } + +public: + /// Build a list of CoverageSegments from a sorted list of Regions. + std::vector<CoverageSegment> buildSegments(ArrayRef<CountedRegion> Regions) { + const CountedRegion *PrevRegion = nullptr; + for (const auto &Region : Regions) { + // Pop any regions that end before this one starts. + while (!ActiveRegions.empty() && + ActiveRegions.back()->endLoc() <= Region.startLoc()) + popRegion(); + if (PrevRegion && PrevRegion->startLoc() == Region.startLoc() && + PrevRegion->endLoc() == Region.endLoc()) { + if (Region.Kind == coverage::CounterMappingRegion::CodeRegion) + Segments.back().addCount(Region.ExecutionCount); + } else { + // Add this region to the stack. + ActiveRegions.push_back(&Region); + startSegment(Region); + } + PrevRegion = &Region; + } + // Pop any regions that are left in the stack. + while (!ActiveRegions.empty()) + popRegion(); + return Segments; + } +}; +} + +std::vector<StringRef> CoverageMapping::getUniqueSourceFiles() const { + std::vector<StringRef> Filenames; + for (const auto &Function : getCoveredFunctions()) + Filenames.insert(Filenames.end(), Function.Filenames.begin(), + Function.Filenames.end()); + std::sort(Filenames.begin(), Filenames.end()); + auto Last = std::unique(Filenames.begin(), Filenames.end()); + Filenames.erase(Last, Filenames.end()); + return Filenames; +} + +static SmallBitVector gatherFileIDs(StringRef SourceFile, + const FunctionRecord &Function) { + SmallBitVector FilenameEquivalence(Function.Filenames.size(), false); + for (unsigned I = 0, E = Function.Filenames.size(); I < E; ++I) + if (SourceFile == Function.Filenames[I]) + FilenameEquivalence[I] = true; + return FilenameEquivalence; +} + +static Optional<unsigned> findMainViewFileID(StringRef SourceFile, + const FunctionRecord &Function) { + SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true); + SmallBitVector FilenameEquivalence = gatherFileIDs(SourceFile, Function); + for (const auto &CR : Function.CountedRegions) + if (CR.Kind == CounterMappingRegion::ExpansionRegion && + FilenameEquivalence[CR.FileID]) + IsNotExpandedFile[CR.ExpandedFileID] = false; + IsNotExpandedFile &= FilenameEquivalence; + int I = IsNotExpandedFile.find_first(); + if (I == -1) + return None; + return I; +} + +static Optional<unsigned> findMainViewFileID(const FunctionRecord &Function) { + SmallBitVector IsNotExpandedFile(Function.Filenames.size(), true); + for (const auto &CR : Function.CountedRegions) + if (CR.Kind == CounterMappingRegion::ExpansionRegion) + IsNotExpandedFile[CR.ExpandedFileID] = false; + int I = IsNotExpandedFile.find_first(); + if (I == -1) + return None; + return I; +} + +/// Sort a nested sequence of regions from a single file. +template <class It> static void sortNestedRegions(It First, It Last) { + std::sort(First, Last, + [](const CountedRegion &LHS, const CountedRegion &RHS) { + if (LHS.startLoc() == RHS.startLoc()) + // When LHS completely contains RHS, we sort LHS first. + return RHS.endLoc() < LHS.endLoc(); + return LHS.startLoc() < RHS.startLoc(); + }); +} + +static bool isExpansion(const CountedRegion &R, unsigned FileID) { + return R.Kind == CounterMappingRegion::ExpansionRegion && R.FileID == FileID; +} + +CoverageData CoverageMapping::getCoverageForFile(StringRef Filename) { + CoverageData FileCoverage(Filename); + std::vector<coverage::CountedRegion> Regions; + + for (const auto &Function : Functions) { + auto MainFileID = findMainViewFileID(Filename, Function); + if (!MainFileID) + continue; + auto FileIDs = gatherFileIDs(Filename, Function); + for (const auto &CR : Function.CountedRegions) + if (FileIDs.test(CR.FileID)) { + Regions.push_back(CR); + if (isExpansion(CR, *MainFileID)) + FileCoverage.Expansions.emplace_back(CR, Function); + } + } + + sortNestedRegions(Regions.begin(), Regions.end()); + DEBUG(dbgs() << "Emitting segments for file: " << Filename << "\n"); + FileCoverage.Segments = SegmentBuilder().buildSegments(Regions); + + return FileCoverage; +} + +std::vector<const FunctionRecord *> +CoverageMapping::getInstantiations(StringRef Filename) { + FunctionInstantiationSetCollector InstantiationSetCollector; + for (const auto &Function : Functions) { + auto MainFileID = findMainViewFileID(Filename, Function); + if (!MainFileID) + continue; + InstantiationSetCollector.insert(Function, *MainFileID); + } + + std::vector<const FunctionRecord *> Result; + for (const auto &InstantiationSet : InstantiationSetCollector) { + if (InstantiationSet.second.size() < 2) + continue; + Result.insert(Result.end(), InstantiationSet.second.begin(), + InstantiationSet.second.end()); + } + return Result; +} + +CoverageData +CoverageMapping::getCoverageForFunction(const FunctionRecord &Function) { + auto MainFileID = findMainViewFileID(Function); + if (!MainFileID) + return CoverageData(); + + CoverageData FunctionCoverage(Function.Filenames[*MainFileID]); + std::vector<coverage::CountedRegion> Regions; + for (const auto &CR : Function.CountedRegions) + if (CR.FileID == *MainFileID) { + Regions.push_back(CR); + if (isExpansion(CR, *MainFileID)) + FunctionCoverage.Expansions.emplace_back(CR, Function); + } + + sortNestedRegions(Regions.begin(), Regions.end()); + DEBUG(dbgs() << "Emitting segments for function: " << Function.Name << "\n"); + FunctionCoverage.Segments = SegmentBuilder().buildSegments(Regions); + + return FunctionCoverage; +} + +CoverageData +CoverageMapping::getCoverageForExpansion(const ExpansionRecord &Expansion) { + CoverageData ExpansionCoverage( + Expansion.Function.Filenames[Expansion.FileID]); + std::vector<coverage::CountedRegion> Regions; + for (const auto &CR : Expansion.Function.CountedRegions) + if (CR.FileID == Expansion.FileID) { + Regions.push_back(CR); + if (isExpansion(CR, Expansion.FileID)) + ExpansionCoverage.Expansions.emplace_back(CR, Expansion.Function); + } + + sortNestedRegions(Regions.begin(), Regions.end()); + DEBUG(dbgs() << "Emitting segments for expansion of file " << Expansion.FileID + << "\n"); + ExpansionCoverage.Segments = SegmentBuilder().buildSegments(Regions); + + return ExpansionCoverage; +} + +namespace { +class CoverageMappingErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.coveragemap"; } + std::string message(int IE) const override { + auto E = static_cast<coveragemap_error>(IE); + switch (E) { + case coveragemap_error::success: + return "Success"; + case coveragemap_error::eof: + return "End of File"; + case coveragemap_error::no_data_found: + return "No coverage data found"; + case coveragemap_error::unsupported_version: + return "Unsupported coverage format version"; + case coveragemap_error::truncated: + return "Truncated coverage data"; + case coveragemap_error::malformed: + return "Malformed coverage data"; + } + llvm_unreachable("A value of coveragemap_error has no message."); + } +}; +} + +static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::coveragemap_category() { + return *ErrorCategory; +} diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp new file mode 100644 index 0000000..334a3f5 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/CoverageMappingReader.cpp @@ -0,0 +1,559 @@ +//=-- CoverageMappingReader.cpp - Code coverage mapping reader ----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/CoverageMappingReader.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/Object/MachOUniversal.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; +using namespace coverage; +using namespace object; + +#define DEBUG_TYPE "coverage-mapping" + +void CoverageMappingIterator::increment() { + // Check if all the records were read or if an error occurred while reading + // the next record. + if (Reader->readNextRecord(Record)) + *this = CoverageMappingIterator(); +} + +std::error_code RawCoverageReader::readULEB128(uint64_t &Result) { + if (Data.size() < 1) + return coveragemap_error::truncated; + unsigned N = 0; + Result = decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); + if (N > Data.size()) + return coveragemap_error::malformed; + Data = Data.substr(N); + return std::error_code(); +} + +std::error_code RawCoverageReader::readIntMax(uint64_t &Result, + uint64_t MaxPlus1) { + if (auto Err = readULEB128(Result)) + return Err; + if (Result >= MaxPlus1) + return coveragemap_error::malformed; + return std::error_code(); +} + +std::error_code RawCoverageReader::readSize(uint64_t &Result) { + if (auto Err = readULEB128(Result)) + return Err; + // Sanity check the number. + if (Result > Data.size()) + return coveragemap_error::malformed; + return std::error_code(); +} + +std::error_code RawCoverageReader::readString(StringRef &Result) { + uint64_t Length; + if (auto Err = readSize(Length)) + return Err; + Result = Data.substr(0, Length); + Data = Data.substr(Length); + return std::error_code(); +} + +std::error_code RawCoverageFilenamesReader::read() { + uint64_t NumFilenames; + if (auto Err = readSize(NumFilenames)) + return Err; + for (size_t I = 0; I < NumFilenames; ++I) { + StringRef Filename; + if (auto Err = readString(Filename)) + return Err; + Filenames.push_back(Filename); + } + return std::error_code(); +} + +std::error_code RawCoverageMappingReader::decodeCounter(unsigned Value, + Counter &C) { + auto Tag = Value & Counter::EncodingTagMask; + switch (Tag) { + case Counter::Zero: + C = Counter::getZero(); + return std::error_code(); + case Counter::CounterValueReference: + C = Counter::getCounter(Value >> Counter::EncodingTagBits); + return std::error_code(); + default: + break; + } + Tag -= Counter::Expression; + switch (Tag) { + case CounterExpression::Subtract: + case CounterExpression::Add: { + auto ID = Value >> Counter::EncodingTagBits; + if (ID >= Expressions.size()) + return coveragemap_error::malformed; + Expressions[ID].Kind = CounterExpression::ExprKind(Tag); + C = Counter::getExpression(ID); + break; + } + default: + return coveragemap_error::malformed; + } + return std::error_code(); +} + +std::error_code RawCoverageMappingReader::readCounter(Counter &C) { + uint64_t EncodedCounter; + if (auto Err = + readIntMax(EncodedCounter, std::numeric_limits<unsigned>::max())) + return Err; + if (auto Err = decodeCounter(EncodedCounter, C)) + return Err; + return std::error_code(); +} + +static const unsigned EncodingExpansionRegionBit = 1 + << Counter::EncodingTagBits; + +/// \brief Read the sub-array of regions for the given inferred file id. +/// \param NumFileIDs the number of file ids that are defined for this +/// function. +std::error_code RawCoverageMappingReader::readMappingRegionsSubArray( + std::vector<CounterMappingRegion> &MappingRegions, unsigned InferredFileID, + size_t NumFileIDs) { + uint64_t NumRegions; + if (auto Err = readSize(NumRegions)) + return Err; + unsigned LineStart = 0; + for (size_t I = 0; I < NumRegions; ++I) { + Counter C; + CounterMappingRegion::RegionKind Kind = CounterMappingRegion::CodeRegion; + + // Read the combined counter + region kind. + uint64_t EncodedCounterAndRegion; + if (auto Err = readIntMax(EncodedCounterAndRegion, + std::numeric_limits<unsigned>::max())) + return Err; + unsigned Tag = EncodedCounterAndRegion & Counter::EncodingTagMask; + uint64_t ExpandedFileID = 0; + if (Tag != Counter::Zero) { + if (auto Err = decodeCounter(EncodedCounterAndRegion, C)) + return Err; + } else { + // Is it an expansion region? + if (EncodedCounterAndRegion & EncodingExpansionRegionBit) { + Kind = CounterMappingRegion::ExpansionRegion; + ExpandedFileID = EncodedCounterAndRegion >> + Counter::EncodingCounterTagAndExpansionRegionTagBits; + if (ExpandedFileID >= NumFileIDs) + return coveragemap_error::malformed; + } else { + switch (EncodedCounterAndRegion >> + Counter::EncodingCounterTagAndExpansionRegionTagBits) { + case CounterMappingRegion::CodeRegion: + // Don't do anything when we have a code region with a zero counter. + break; + case CounterMappingRegion::SkippedRegion: + Kind = CounterMappingRegion::SkippedRegion; + break; + default: + return coveragemap_error::malformed; + } + } + } + + // Read the source range. + uint64_t LineStartDelta, ColumnStart, NumLines, ColumnEnd; + if (auto Err = + readIntMax(LineStartDelta, std::numeric_limits<unsigned>::max())) + return Err; + if (auto Err = readULEB128(ColumnStart)) + return Err; + if (ColumnStart > std::numeric_limits<unsigned>::max()) + return coveragemap_error::malformed; + if (auto Err = readIntMax(NumLines, std::numeric_limits<unsigned>::max())) + return Err; + if (auto Err = readIntMax(ColumnEnd, std::numeric_limits<unsigned>::max())) + return Err; + LineStart += LineStartDelta; + // Adjust the column locations for the empty regions that are supposed to + // cover whole lines. Those regions should be encoded with the + // column range (1 -> std::numeric_limits<unsigned>::max()), but because + // the encoded std::numeric_limits<unsigned>::max() is several bytes long, + // we set the column range to (0 -> 0) to ensure that the column start and + // column end take up one byte each. + // The std::numeric_limits<unsigned>::max() is used to represent a column + // position at the end of the line without knowing the length of that line. + if (ColumnStart == 0 && ColumnEnd == 0) { + ColumnStart = 1; + ColumnEnd = std::numeric_limits<unsigned>::max(); + } + + DEBUG({ + dbgs() << "Counter in file " << InferredFileID << " " << LineStart << ":" + << ColumnStart << " -> " << (LineStart + NumLines) << ":" + << ColumnEnd << ", "; + if (Kind == CounterMappingRegion::ExpansionRegion) + dbgs() << "Expands to file " << ExpandedFileID; + else + CounterMappingContext(Expressions).dump(C, dbgs()); + dbgs() << "\n"; + }); + + MappingRegions.push_back(CounterMappingRegion( + C, InferredFileID, ExpandedFileID, LineStart, ColumnStart, + LineStart + NumLines, ColumnEnd, Kind)); + } + return std::error_code(); +} + +std::error_code RawCoverageMappingReader::read() { + + // Read the virtual file mapping. + llvm::SmallVector<unsigned, 8> VirtualFileMapping; + uint64_t NumFileMappings; + if (auto Err = readSize(NumFileMappings)) + return Err; + for (size_t I = 0; I < NumFileMappings; ++I) { + uint64_t FilenameIndex; + if (auto Err = readIntMax(FilenameIndex, TranslationUnitFilenames.size())) + return Err; + VirtualFileMapping.push_back(FilenameIndex); + } + + // Construct the files using unique filenames and virtual file mapping. + for (auto I : VirtualFileMapping) { + Filenames.push_back(TranslationUnitFilenames[I]); + } + + // Read the expressions. + uint64_t NumExpressions; + if (auto Err = readSize(NumExpressions)) + return Err; + // Create an array of dummy expressions that get the proper counters + // when the expressions are read, and the proper kinds when the counters + // are decoded. + Expressions.resize( + NumExpressions, + CounterExpression(CounterExpression::Subtract, Counter(), Counter())); + for (size_t I = 0; I < NumExpressions; ++I) { + if (auto Err = readCounter(Expressions[I].LHS)) + return Err; + if (auto Err = readCounter(Expressions[I].RHS)) + return Err; + } + + // Read the mapping regions sub-arrays. + for (unsigned InferredFileID = 0, S = VirtualFileMapping.size(); + InferredFileID < S; ++InferredFileID) { + if (auto Err = readMappingRegionsSubArray(MappingRegions, InferredFileID, + VirtualFileMapping.size())) + return Err; + } + + // Set the counters for the expansion regions. + // i.e. Counter of expansion region = counter of the first region + // from the expanded file. + // Perform multiple passes to correctly propagate the counters through + // all the nested expansion regions. + SmallVector<CounterMappingRegion *, 8> FileIDExpansionRegionMapping; + FileIDExpansionRegionMapping.resize(VirtualFileMapping.size(), nullptr); + for (unsigned Pass = 1, S = VirtualFileMapping.size(); Pass < S; ++Pass) { + for (auto &R : MappingRegions) { + if (R.Kind != CounterMappingRegion::ExpansionRegion) + continue; + assert(!FileIDExpansionRegionMapping[R.ExpandedFileID]); + FileIDExpansionRegionMapping[R.ExpandedFileID] = &R; + } + for (auto &R : MappingRegions) { + if (FileIDExpansionRegionMapping[R.FileID]) { + FileIDExpansionRegionMapping[R.FileID]->Count = R.Count; + FileIDExpansionRegionMapping[R.FileID] = nullptr; + } + } + } + + return std::error_code(); +} + +namespace { + +/// \brief A helper structure to access the data from a section +/// in an object file. +struct SectionData { + StringRef Data; + uint64_t Address; + + std::error_code load(SectionRef &Section) { + if (auto Err = Section.getContents(Data)) + return Err; + Address = Section.getAddress(); + return std::error_code(); + } + + std::error_code get(uint64_t Pointer, size_t Size, StringRef &Result) { + if (Pointer < Address) + return coveragemap_error::malformed; + auto Offset = Pointer - Address; + if (Offset + Size > Data.size()) + return coveragemap_error::malformed; + Result = Data.substr(Pointer - Address, Size); + return std::error_code(); + } +}; +} + +template <typename T, support::endianness Endian> +std::error_code readCoverageMappingData( + SectionData &ProfileNames, StringRef Data, + std::vector<BinaryCoverageReader::ProfileMappingRecord> &Records, + std::vector<StringRef> &Filenames) { + using namespace support; + llvm::DenseSet<T> UniqueFunctionMappingData; + + // Read the records in the coverage data section. + for (const char *Buf = Data.data(), *End = Buf + Data.size(); Buf < End;) { + if (Buf + 4 * sizeof(uint32_t) > End) + return coveragemap_error::malformed; + uint32_t NRecords = endian::readNext<uint32_t, Endian, unaligned>(Buf); + uint32_t FilenamesSize = endian::readNext<uint32_t, Endian, unaligned>(Buf); + uint32_t CoverageSize = endian::readNext<uint32_t, Endian, unaligned>(Buf); + uint32_t Version = endian::readNext<uint32_t, Endian, unaligned>(Buf); + + switch (Version) { + case CoverageMappingVersion1: + break; + default: + return coveragemap_error::unsupported_version; + } + + // Skip past the function records, saving the start and end for later. + const char *FunBuf = Buf; + Buf += NRecords * (sizeof(T) + 2 * sizeof(uint32_t) + sizeof(uint64_t)); + const char *FunEnd = Buf; + + // Get the filenames. + if (Buf + FilenamesSize > End) + return coveragemap_error::malformed; + size_t FilenamesBegin = Filenames.size(); + RawCoverageFilenamesReader Reader(StringRef(Buf, FilenamesSize), Filenames); + if (auto Err = Reader.read()) + return Err; + Buf += FilenamesSize; + + // We'll read the coverage mapping records in the loop below. + const char *CovBuf = Buf; + Buf += CoverageSize; + const char *CovEnd = Buf; + + if (Buf > End) + return coveragemap_error::malformed; + // Each coverage map has an alignment of 8, so we need to adjust alignment + // before reading the next map. + Buf += alignmentAdjustment(Buf, 8); + + while (FunBuf < FunEnd) { + // Read the function information + T NamePtr = endian::readNext<T, Endian, unaligned>(FunBuf); + uint32_t NameSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf); + uint32_t DataSize = endian::readNext<uint32_t, Endian, unaligned>(FunBuf); + uint64_t FuncHash = endian::readNext<uint64_t, Endian, unaligned>(FunBuf); + + // Now use that to read the coverage data. + if (CovBuf + DataSize > CovEnd) + return coveragemap_error::malformed; + auto Mapping = StringRef(CovBuf, DataSize); + CovBuf += DataSize; + + // Ignore this record if we already have a record that points to the same + // function name. This is useful to ignore the redundant records for the + // functions with ODR linkage. + if (!UniqueFunctionMappingData.insert(NamePtr).second) + continue; + + // Finally, grab the name and create a record. + StringRef FuncName; + if (std::error_code EC = ProfileNames.get(NamePtr, NameSize, FuncName)) + return EC; + Records.push_back(BinaryCoverageReader::ProfileMappingRecord( + CoverageMappingVersion(Version), FuncName, FuncHash, Mapping, + FilenamesBegin, Filenames.size() - FilenamesBegin)); + } + } + + return std::error_code(); +} + +static const char *TestingFormatMagic = "llvmcovmtestdata"; + +static std::error_code loadTestingFormat(StringRef Data, + SectionData &ProfileNames, + StringRef &CoverageMapping, + uint8_t &BytesInAddress, + support::endianness &Endian) { + BytesInAddress = 8; + Endian = support::endianness::little; + + Data = Data.substr(StringRef(TestingFormatMagic).size()); + if (Data.size() < 1) + return coveragemap_error::truncated; + unsigned N = 0; + auto ProfileNamesSize = + decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); + if (N > Data.size()) + return coveragemap_error::malformed; + Data = Data.substr(N); + if (Data.size() < 1) + return coveragemap_error::truncated; + N = 0; + ProfileNames.Address = + decodeULEB128(reinterpret_cast<const uint8_t *>(Data.data()), &N); + if (N > Data.size()) + return coveragemap_error::malformed; + Data = Data.substr(N); + if (Data.size() < ProfileNamesSize) + return coveragemap_error::malformed; + ProfileNames.Data = Data.substr(0, ProfileNamesSize); + CoverageMapping = Data.substr(ProfileNamesSize); + return std::error_code(); +} + +static ErrorOr<SectionRef> lookupSection(ObjectFile &OF, StringRef Name) { + StringRef FoundName; + for (const auto &Section : OF.sections()) { + if (auto EC = Section.getName(FoundName)) + return EC; + if (FoundName == Name) + return Section; + } + return coveragemap_error::no_data_found; +} + +static std::error_code loadBinaryFormat(MemoryBufferRef ObjectBuffer, + SectionData &ProfileNames, + StringRef &CoverageMapping, + uint8_t &BytesInAddress, + support::endianness &Endian, + StringRef Arch) { + auto BinOrErr = object::createBinary(ObjectBuffer); + if (std::error_code EC = BinOrErr.getError()) + return EC; + auto Bin = std::move(BinOrErr.get()); + std::unique_ptr<ObjectFile> OF; + if (auto *Universal = dyn_cast<object::MachOUniversalBinary>(Bin.get())) { + // If we have a universal binary, try to look up the object for the + // appropriate architecture. + auto ObjectFileOrErr = Universal->getObjectForArch(Arch); + if (std::error_code EC = ObjectFileOrErr.getError()) + return EC; + OF = std::move(ObjectFileOrErr.get()); + } else if (isa<object::ObjectFile>(Bin.get())) { + // For any other object file, upcast and take ownership. + OF.reset(cast<object::ObjectFile>(Bin.release())); + // If we've asked for a particular arch, make sure they match. + if (!Arch.empty() && OF->getArch() != Triple(Arch).getArch()) + return object_error::arch_not_found; + } else + // We can only handle object files. + return coveragemap_error::malformed; + + // The coverage uses native pointer sizes for the object it's written in. + BytesInAddress = OF->getBytesInAddress(); + Endian = OF->isLittleEndian() ? support::endianness::little + : support::endianness::big; + + // Look for the sections that we are interested in. + auto NamesSection = lookupSection(*OF, "__llvm_prf_names"); + if (auto EC = NamesSection.getError()) + return EC; + auto CoverageSection = lookupSection(*OF, "__llvm_covmap"); + if (auto EC = CoverageSection.getError()) + return EC; + + // Get the contents of the given sections. + if (std::error_code EC = CoverageSection->getContents(CoverageMapping)) + return EC; + if (std::error_code EC = ProfileNames.load(*NamesSection)) + return EC; + + return std::error_code(); +} + +ErrorOr<std::unique_ptr<BinaryCoverageReader>> +BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer, + StringRef Arch) { + std::unique_ptr<BinaryCoverageReader> Reader(new BinaryCoverageReader()); + + SectionData Profile; + StringRef Coverage; + uint8_t BytesInAddress; + support::endianness Endian; + std::error_code EC; + if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic)) + // This is a special format used for testing. + EC = loadTestingFormat(ObjectBuffer->getBuffer(), Profile, Coverage, + BytesInAddress, Endian); + else + EC = loadBinaryFormat(ObjectBuffer->getMemBufferRef(), Profile, Coverage, + BytesInAddress, Endian, Arch); + if (EC) + return EC; + + if (BytesInAddress == 4 && Endian == support::endianness::little) + EC = readCoverageMappingData<uint32_t, support::endianness::little>( + Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 4 && Endian == support::endianness::big) + EC = readCoverageMappingData<uint32_t, support::endianness::big>( + Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 8 && Endian == support::endianness::little) + EC = readCoverageMappingData<uint64_t, support::endianness::little>( + Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + else if (BytesInAddress == 8 && Endian == support::endianness::big) + EC = readCoverageMappingData<uint64_t, support::endianness::big>( + Profile, Coverage, Reader->MappingRecords, Reader->Filenames); + else + return coveragemap_error::malformed; + if (EC) + return EC; + return std::move(Reader); +} + +std::error_code +BinaryCoverageReader::readNextRecord(CoverageMappingRecord &Record) { + if (CurrentRecord >= MappingRecords.size()) + return coveragemap_error::eof; + + FunctionsFilenames.clear(); + Expressions.clear(); + MappingRegions.clear(); + auto &R = MappingRecords[CurrentRecord]; + RawCoverageMappingReader Reader( + R.CoverageMapping, + makeArrayRef(Filenames).slice(R.FilenamesBegin, R.FilenamesSize), + FunctionsFilenames, Expressions, MappingRegions); + if (auto Err = Reader.read()) + return Err; + + Record.FunctionName = R.FunctionName; + Record.FunctionHash = R.FunctionHash; + Record.Filenames = FunctionsFilenames; + Record.Expressions = Expressions; + Record.MappingRegions = MappingRegions; + + ++CurrentRecord; + return std::error_code(); +} diff --git a/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp new file mode 100644 index 0000000..d90d2f5 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/CoverageMappingWriter.cpp @@ -0,0 +1,183 @@ +//=-- CoverageMappingWriter.cpp - Code coverage mapping writer -------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing coverage mapping data for +// instrumentation based coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/CoverageMappingWriter.h" +#include "llvm/Support/LEB128.h" + +using namespace llvm; +using namespace coverage; + +void CoverageFilenamesSectionWriter::write(raw_ostream &OS) { + encodeULEB128(Filenames.size(), OS); + for (const auto &Filename : Filenames) { + encodeULEB128(Filename.size(), OS); + OS << Filename; + } +} + +namespace { +/// \brief Gather only the expressions that are used by the mapping +/// regions in this function. +class CounterExpressionsMinimizer { + ArrayRef<CounterExpression> Expressions; + llvm::SmallVector<CounterExpression, 16> UsedExpressions; + std::vector<unsigned> AdjustedExpressionIDs; + +public: + void mark(Counter C) { + if (!C.isExpression()) + return; + unsigned ID = C.getExpressionID(); + AdjustedExpressionIDs[ID] = 1; + mark(Expressions[ID].LHS); + mark(Expressions[ID].RHS); + } + + void gatherUsed(Counter C) { + if (!C.isExpression() || !AdjustedExpressionIDs[C.getExpressionID()]) + return; + AdjustedExpressionIDs[C.getExpressionID()] = UsedExpressions.size(); + const auto &E = Expressions[C.getExpressionID()]; + UsedExpressions.push_back(E); + gatherUsed(E.LHS); + gatherUsed(E.RHS); + } + + CounterExpressionsMinimizer(ArrayRef<CounterExpression> Expressions, + ArrayRef<CounterMappingRegion> MappingRegions) + : Expressions(Expressions) { + AdjustedExpressionIDs.resize(Expressions.size(), 0); + for (const auto &I : MappingRegions) + mark(I.Count); + for (const auto &I : MappingRegions) + gatherUsed(I.Count); + } + + ArrayRef<CounterExpression> getExpressions() const { return UsedExpressions; } + + /// \brief Adjust the given counter to correctly transition from the old + /// expression ids to the new expression ids. + Counter adjust(Counter C) const { + if (C.isExpression()) + C = Counter::getExpression(AdjustedExpressionIDs[C.getExpressionID()]); + return C; + } +}; +} + +/// \brief Encode the counter. +/// +/// The encoding uses the following format: +/// Low 2 bits - Tag: +/// Counter::Zero(0) - A Counter with kind Counter::Zero +/// Counter::CounterValueReference(1) - A counter with kind +/// Counter::CounterValueReference +/// Counter::Expression(2) + CounterExpression::Subtract(0) - +/// A counter with kind Counter::Expression and an expression +/// with kind CounterExpression::Subtract +/// Counter::Expression(2) + CounterExpression::Add(1) - +/// A counter with kind Counter::Expression and an expression +/// with kind CounterExpression::Add +/// Remaining bits - Counter/Expression ID. +static unsigned encodeCounter(ArrayRef<CounterExpression> Expressions, + Counter C) { + unsigned Tag = unsigned(C.getKind()); + if (C.isExpression()) + Tag += Expressions[C.getExpressionID()].Kind; + unsigned ID = C.getCounterID(); + assert(ID <= + (std::numeric_limits<unsigned>::max() >> Counter::EncodingTagBits)); + return Tag | (ID << Counter::EncodingTagBits); +} + +static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C, + raw_ostream &OS) { + encodeULEB128(encodeCounter(Expressions, C), OS); +} + +void CoverageMappingWriter::write(raw_ostream &OS) { + // Sort the regions in an ascending order by the file id and the starting + // location. + std::stable_sort(MappingRegions.begin(), MappingRegions.end()); + + // Write out the fileid -> filename mapping. + encodeULEB128(VirtualFileMapping.size(), OS); + for (const auto &FileID : VirtualFileMapping) + encodeULEB128(FileID, OS); + + // Write out the expressions. + CounterExpressionsMinimizer Minimizer(Expressions, MappingRegions); + auto MinExpressions = Minimizer.getExpressions(); + encodeULEB128(MinExpressions.size(), OS); + for (const auto &E : MinExpressions) { + writeCounter(MinExpressions, Minimizer.adjust(E.LHS), OS); + writeCounter(MinExpressions, Minimizer.adjust(E.RHS), OS); + } + + // Write out the mapping regions. + // Split the regions into subarrays where each region in a + // subarray has a fileID which is the index of that subarray. + unsigned PrevLineStart = 0; + unsigned CurrentFileID = ~0U; + for (auto I = MappingRegions.begin(), E = MappingRegions.end(); I != E; ++I) { + if (I->FileID != CurrentFileID) { + // Ensure that all file ids have at least one mapping region. + assert(I->FileID == (CurrentFileID + 1)); + // Find the number of regions with this file id. + unsigned RegionCount = 1; + for (auto J = I + 1; J != E && I->FileID == J->FileID; ++J) + ++RegionCount; + // Start a new region sub-array. + encodeULEB128(RegionCount, OS); + + CurrentFileID = I->FileID; + PrevLineStart = 0; + } + Counter Count = Minimizer.adjust(I->Count); + switch (I->Kind) { + case CounterMappingRegion::CodeRegion: + writeCounter(MinExpressions, Count, OS); + break; + case CounterMappingRegion::ExpansionRegion: { + assert(Count.isZero()); + assert(I->ExpandedFileID <= + (std::numeric_limits<unsigned>::max() >> + Counter::EncodingCounterTagAndExpansionRegionTagBits)); + // Mark an expansion region with a set bit that follows the counter tag, + // and pack the expanded file id into the remaining bits. + unsigned EncodedTagExpandedFileID = + (1 << Counter::EncodingTagBits) | + (I->ExpandedFileID + << Counter::EncodingCounterTagAndExpansionRegionTagBits); + encodeULEB128(EncodedTagExpandedFileID, OS); + break; + } + case CounterMappingRegion::SkippedRegion: + assert(Count.isZero()); + encodeULEB128(unsigned(I->Kind) + << Counter::EncodingCounterTagAndExpansionRegionTagBits, + OS); + break; + } + assert(I->LineStart >= PrevLineStart); + encodeULEB128(I->LineStart - PrevLineStart, OS); + encodeULEB128(I->ColumnStart, OS); + assert(I->LineEnd >= I->LineStart); + encodeULEB128(I->LineEnd - I->LineStart, OS); + encodeULEB128(I->ColumnEnd, OS); + PrevLineStart = I->LineStart; + } + // Ensure that all file ids have at least one mapping region. + assert(CurrentFileID == (VirtualFileMapping.size() - 1)); +} diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp new file mode 100644 index 0000000..92822a7 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp @@ -0,0 +1,63 @@ +//=-- InstrProf.cpp - Instrumented profiling format support -----------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for clang's instrumentation based PGO and +// coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +namespace { +class InstrProfErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.instrprof"; } + std::string message(int IE) const override { + instrprof_error E = static_cast<instrprof_error>(IE); + switch (E) { + case instrprof_error::success: + return "Success"; + case instrprof_error::eof: + return "End of File"; + case instrprof_error::bad_magic: + return "Invalid profile data (bad magic)"; + case instrprof_error::bad_header: + return "Invalid profile data (file header is corrupt)"; + case instrprof_error::unsupported_version: + return "Unsupported profiling format version"; + case instrprof_error::unsupported_hash_type: + return "Unsupported profiling hash"; + case instrprof_error::too_large: + return "Too much profile data"; + case instrprof_error::truncated: + return "Truncated profile data"; + case instrprof_error::malformed: + return "Malformed profile data"; + case instrprof_error::unknown_function: + return "No profile data available for function"; + case instrprof_error::hash_mismatch: + return "Function hash mismatch"; + case instrprof_error::count_mismatch: + return "Function count mismatch"; + case instrprof_error::counter_overflow: + return "Counter overflow"; + } + llvm_unreachable("A value of instrprof_error has no message."); + } +}; +} + +static ManagedStatic<InstrProfErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::instrprof_category() { + return *ErrorCategory; +} diff --git a/contrib/llvm/lib/ProfileData/InstrProfIndexed.h b/contrib/llvm/lib/ProfileData/InstrProfIndexed.h new file mode 100644 index 0000000..ebca7b2 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProfIndexed.h @@ -0,0 +1,56 @@ +//=-- InstrProfIndexed.h - Indexed profiling format support -------*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Shared header for the instrumented profile data reader and writer. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H +#define LLVM_LIB_PROFILEDATA_INSTRPROFINDEXED_H + +#include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +namespace IndexedInstrProf { +enum class HashT : uint32_t { + MD5, + + Last = MD5 +}; + +static inline uint64_t MD5Hash(StringRef Str) { + MD5 Hash; + Hash.update(Str); + llvm::MD5::MD5Result Result; + Hash.final(Result); + // Return the least significant 8 bytes. Our MD5 implementation returns the + // result in little endian, so we may need to swap bytes. + using namespace llvm::support; + return endian::read<uint64_t, little, unaligned>(Result); +} + +static inline uint64_t ComputeHash(HashT Type, StringRef K) { + switch (Type) { + case HashT::MD5: + return IndexedInstrProf::MD5Hash(K); + } + llvm_unreachable("Unhandled hash type"); +} + +const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" +const uint64_t Version = 2; +const HashT HashType = HashT::MD5; +} + +} // end namespace llvm + +#endif diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp new file mode 100644 index 0000000..8a529a0 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp @@ -0,0 +1,431 @@ +//=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for reading profiling data for clang's +// instrumentation based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProfReader.h" +#include "InstrProfIndexed.h" +#include "llvm/ADT/STLExtras.h" +#include <cassert> + +using namespace llvm; + +static ErrorOr<std::unique_ptr<MemoryBuffer>> +setupMemoryBuffer(std::string Path) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = BufferOrErr.getError()) + return EC; + return std::move(BufferOrErr.get()); +} + +static std::error_code initializeReader(InstrProfReader &Reader) { + return Reader.readHeader(); +} + +ErrorOr<std::unique_ptr<InstrProfReader>> +InstrProfReader::create(std::string Path) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path); + if (std::error_code EC = BufferOrError.getError()) + return EC; + return InstrProfReader::create(std::move(BufferOrError.get())); +} + +ErrorOr<std::unique_ptr<InstrProfReader>> +InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { + // Sanity check the buffer. + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + return instrprof_error::too_large; + + std::unique_ptr<InstrProfReader> Result; + // Create the reader. + if (IndexedInstrProfReader::hasFormat(*Buffer)) + Result.reset(new IndexedInstrProfReader(std::move(Buffer))); + else if (RawInstrProfReader64::hasFormat(*Buffer)) + Result.reset(new RawInstrProfReader64(std::move(Buffer))); + else if (RawInstrProfReader32::hasFormat(*Buffer)) + Result.reset(new RawInstrProfReader32(std::move(Buffer))); + else + Result.reset(new TextInstrProfReader(std::move(Buffer))); + + // Initialize the reader and return the result. + if (std::error_code EC = initializeReader(*Result)) + return EC; + + return std::move(Result); +} + +ErrorOr<std::unique_ptr<IndexedInstrProfReader>> +IndexedInstrProfReader::create(std::string Path) { + // Set up the buffer to read. + auto BufferOrError = setupMemoryBuffer(Path); + if (std::error_code EC = BufferOrError.getError()) + return EC; + return IndexedInstrProfReader::create(std::move(BufferOrError.get())); +} + + +ErrorOr<std::unique_ptr<IndexedInstrProfReader>> +IndexedInstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) { + // Sanity check the buffer. + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + return instrprof_error::too_large; + + // Create the reader. + if (!IndexedInstrProfReader::hasFormat(*Buffer)) + return instrprof_error::bad_magic; + auto Result = llvm::make_unique<IndexedInstrProfReader>(std::move(Buffer)); + + // Initialize the reader and return the result. + if (std::error_code EC = initializeReader(*Result)) + return EC; + + return std::move(Result); +} + +void InstrProfIterator::Increment() { + if (Reader->readNextRecord(Record)) + *this = InstrProfIterator(); +} + +std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { + // Skip empty lines and comments. + while (!Line.is_at_end() && (Line->empty() || Line->startswith("#"))) + ++Line; + // If we hit EOF while looking for a name, we're done. + if (Line.is_at_end()) + return error(instrprof_error::eof); + + // Read the function name. + Record.Name = *Line++; + + // Read the function hash. + if (Line.is_at_end()) + return error(instrprof_error::truncated); + if ((Line++)->getAsInteger(0, Record.Hash)) + return error(instrprof_error::malformed); + + // Read the number of counters. + uint64_t NumCounters; + if (Line.is_at_end()) + return error(instrprof_error::truncated); + if ((Line++)->getAsInteger(10, NumCounters)) + return error(instrprof_error::malformed); + if (NumCounters == 0) + return error(instrprof_error::malformed); + + // Read each counter and fill our internal storage with the values. + Record.Counts.clear(); + Record.Counts.reserve(NumCounters); + for (uint64_t I = 0; I < NumCounters; ++I) { + if (Line.is_at_end()) + return error(instrprof_error::truncated); + uint64_t Count; + if ((Line++)->getAsInteger(10, Count)) + return error(instrprof_error::malformed); + Record.Counts.push_back(Count); + } + + return success(); +} + +template <class IntPtrT> +static uint64_t getRawMagic(); + +template <> +uint64_t getRawMagic<uint64_t>() { + return + uint64_t(255) << 56 | + uint64_t('l') << 48 | + uint64_t('p') << 40 | + uint64_t('r') << 32 | + uint64_t('o') << 24 | + uint64_t('f') << 16 | + uint64_t('r') << 8 | + uint64_t(129); +} + +template <> +uint64_t getRawMagic<uint32_t>() { + return + uint64_t(255) << 56 | + uint64_t('l') << 48 | + uint64_t('p') << 40 | + uint64_t('r') << 32 | + uint64_t('o') << 24 | + uint64_t('f') << 16 | + uint64_t('R') << 8 | + uint64_t(129); +} + +template <class IntPtrT> +bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < sizeof(uint64_t)) + return false; + uint64_t Magic = + *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); + return getRawMagic<IntPtrT>() == Magic || + sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic; +} + +template <class IntPtrT> +std::error_code RawInstrProfReader<IntPtrT>::readHeader() { + if (!hasFormat(*DataBuffer)) + return error(instrprof_error::bad_magic); + if (DataBuffer->getBufferSize() < sizeof(RawHeader)) + return error(instrprof_error::bad_header); + auto *Header = + reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart()); + ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>(); + return readHeader(*Header); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { + const char *End = DataBuffer->getBufferEnd(); + // Skip zero padding between profiles. + while (CurrentPos != End && *CurrentPos == 0) + ++CurrentPos; + // If there's nothing left, we're done. + if (CurrentPos == End) + return instrprof_error::eof; + // If there isn't enough space for another header, this is probably just + // garbage at the end of the file. + if (CurrentPos + sizeof(RawHeader) > End) + return instrprof_error::malformed; + // The writer ensures each profile is padded to start at an aligned address. + if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>()) + return instrprof_error::malformed; + // The magic should have the same byte order as in the previous header. + uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos); + if (Magic != swap(getRawMagic<IntPtrT>())) + return instrprof_error::bad_magic; + + // There's another profile to read, so we need to process the header. + auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos); + return readHeader(*Header); +} + +static uint64_t getRawVersion() { + return 1; +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { + if (swap(Header.Version) != getRawVersion()) + return error(instrprof_error::unsupported_version); + + CountersDelta = swap(Header.CountersDelta); + NamesDelta = swap(Header.NamesDelta); + auto DataSize = swap(Header.DataSize); + auto CountersSize = swap(Header.CountersSize); + auto NamesSize = swap(Header.NamesSize); + + ptrdiff_t DataOffset = sizeof(RawHeader); + ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize; + ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize; + size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize; + + auto *Start = reinterpret_cast<const char *>(&Header); + if (Start + ProfileSize > DataBuffer->getBufferEnd()) + return error(instrprof_error::bad_header); + + Data = reinterpret_cast<const ProfileData *>(Start + DataOffset); + DataEnd = Data + DataSize; + CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset); + NamesStart = Start + NamesOffset; + ProfileEnd = Start + ProfileSize; + + return success(); +} + +template <class IntPtrT> +std::error_code +RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { + if (Data == DataEnd) + if (std::error_code EC = readNextHeader(ProfileEnd)) + return EC; + + // Get the raw data. + StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize)); + uint32_t NumCounters = swap(Data->NumCounters); + if (NumCounters == 0) + return error(instrprof_error::malformed); + auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters); + + // Check bounds. + auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart); + if (RawName.data() < NamesStart || + RawName.data() + RawName.size() > DataBuffer->getBufferEnd() || + RawCounts.data() < CountersStart || + RawCounts.data() + RawCounts.size() > NamesStartAsCounter) + return error(instrprof_error::malformed); + + // Store the data in Record, byte-swapping as necessary. + Record.Hash = swap(Data->FuncHash); + Record.Name = RawName; + if (ShouldSwapBytes) { + Record.Counts.clear(); + Record.Counts.reserve(RawCounts.size()); + for (uint64_t Count : RawCounts) + Record.Counts.push_back(swap(Count)); + } else + Record.Counts = RawCounts; + + // Iterate. + ++Data; + return success(); +} + +namespace llvm { +template class RawInstrProfReader<uint32_t>; +template class RawInstrProfReader<uint64_t>; +} + +InstrProfLookupTrait::hash_value_type +InstrProfLookupTrait::ComputeHash(StringRef K) { + return IndexedInstrProf::ComputeHash(HashType, K); +} + +typedef InstrProfLookupTrait::data_type data_type; +typedef InstrProfLookupTrait::offset_type offset_type; + +data_type InstrProfLookupTrait::ReadData(StringRef K, const unsigned char *D, + offset_type N) { + + // Check if the data is corrupt. If so, don't try to read it. + if (N % sizeof(uint64_t)) + return data_type(); + + DataBuffer.clear(); + uint64_t NumCounts; + uint64_t NumEntries = N / sizeof(uint64_t); + std::vector<uint64_t> CounterBuffer; + for (uint64_t I = 0; I < NumEntries; I += NumCounts) { + using namespace support; + // The function hash comes first. + uint64_t Hash = endian::readNext<uint64_t, little, unaligned>(D); + + if (++I >= NumEntries) + return data_type(); + + // In v1, we have at least one count. + // Later, we have the number of counts. + NumCounts = (1 == FormatVersion) + ? NumEntries - I + : endian::readNext<uint64_t, little, unaligned>(D); + if (1 != FormatVersion) + ++I; + + // If we have more counts than data, this is bogus. + if (I + NumCounts > NumEntries) + return data_type(); + + CounterBuffer.clear(); + for (unsigned J = 0; J < NumCounts; ++J) + CounterBuffer.push_back(endian::readNext<uint64_t, little, unaligned>(D)); + + DataBuffer.push_back(InstrProfRecord(K, Hash, std::move(CounterBuffer))); + } + return DataBuffer; +} + +bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { + if (DataBuffer.getBufferSize() < 8) + return false; + using namespace support; + uint64_t Magic = + endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart()); + return Magic == IndexedInstrProf::Magic; +} + +std::error_code IndexedInstrProfReader::readHeader() { + const unsigned char *Start = + (const unsigned char *)DataBuffer->getBufferStart(); + const unsigned char *Cur = Start; + if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24) + return error(instrprof_error::truncated); + + using namespace support; + + // Check the magic number. + uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur); + if (Magic != IndexedInstrProf::Magic) + return error(instrprof_error::bad_magic); + + // Read the version. + FormatVersion = endian::readNext<uint64_t, little, unaligned>(Cur); + if (FormatVersion > IndexedInstrProf::Version) + return error(instrprof_error::unsupported_version); + + // Read the maximal function count. + MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur); + + // Read the hash type and start offset. + IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>( + endian::readNext<uint64_t, little, unaligned>(Cur)); + if (HashType > IndexedInstrProf::HashT::Last) + return error(instrprof_error::unsupported_hash_type); + uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur); + + // The rest of the file is an on disk hash table. + Index.reset(InstrProfReaderIndex::Create( + Start + HashOffset, Cur, Start, + InstrProfLookupTrait(HashType, FormatVersion))); + // Set up our iterator for readNextRecord. + RecordIterator = Index->data_begin(); + + return success(); +} + +std::error_code IndexedInstrProfReader::getFunctionCounts( + StringRef FuncName, uint64_t FuncHash, std::vector<uint64_t> &Counts) { + auto Iter = Index->find(FuncName); + if (Iter == Index->end()) + return error(instrprof_error::unknown_function); + + // Found it. Look for counters with the right hash. + ArrayRef<InstrProfRecord> Data = (*Iter); + if (Data.empty()) + return error(instrprof_error::malformed); + + for (unsigned I = 0, E = Data.size(); I < E; ++I) { + // Check for a match and fill the vector if there is one. + if (Data[I].Hash == FuncHash) { + Counts = Data[I].Counts; + return success(); + } + } + return error(instrprof_error::hash_mismatch); +} + +std::error_code +IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { + // Are we out of records? + if (RecordIterator == Index->data_end()) + return error(instrprof_error::eof); + + if ((*RecordIterator).empty()) + return error(instrprof_error::malformed); + + static unsigned RecordIndex = 0; + ArrayRef<InstrProfRecord> Data = (*RecordIterator); + Record = Data[RecordIndex++]; + if (RecordIndex >= Data.size()) { + ++RecordIterator; + RecordIndex = 0; + } + return success(); +} diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp new file mode 100644 index 0000000..2188543 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -0,0 +1,159 @@ +//=-- InstrProfWriter.cpp - Instrumented profiling writer -------------------=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing profiling data for clang's +// instrumentation based PGO and coverage. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/InstrProfWriter.h" +#include "InstrProfIndexed.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/EndianStream.h" +#include "llvm/Support/OnDiskHashTable.h" + +using namespace llvm; + +namespace { +class InstrProfRecordTrait { +public: + typedef StringRef key_type; + typedef StringRef key_type_ref; + + typedef const InstrProfWriter::CounterData *const data_type; + typedef const InstrProfWriter::CounterData *const data_type_ref; + + typedef uint64_t hash_value_type; + typedef uint64_t offset_type; + + static hash_value_type ComputeHash(key_type_ref K) { + return IndexedInstrProf::ComputeHash(IndexedInstrProf::HashType, K); + } + + static std::pair<offset_type, offset_type> + EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { + using namespace llvm::support; + endian::Writer<little> LE(Out); + + offset_type N = K.size(); + LE.write<offset_type>(N); + + offset_type M = 0; + for (const auto &Counts : *V) + M += (2 + Counts.second.size()) * sizeof(uint64_t); + LE.write<offset_type>(M); + + return std::make_pair(N, M); + } + + static void EmitKey(raw_ostream &Out, key_type_ref K, offset_type N){ + Out.write(K.data(), N); + } + + static void EmitData(raw_ostream &Out, key_type_ref, data_type_ref V, + offset_type) { + using namespace llvm::support; + endian::Writer<little> LE(Out); + + for (const auto &Counts : *V) { + LE.write<uint64_t>(Counts.first); + LE.write<uint64_t>(Counts.second.size()); + for (uint64_t I : Counts.second) + LE.write<uint64_t>(I); + } + } +}; +} + +std::error_code +InstrProfWriter::addFunctionCounts(StringRef FunctionName, + uint64_t FunctionHash, + ArrayRef<uint64_t> Counters) { + auto &CounterData = FunctionData[FunctionName]; + + auto Where = CounterData.find(FunctionHash); + if (Where == CounterData.end()) { + // We've never seen a function with this name and hash, add it. + CounterData[FunctionHash] = Counters; + // We keep track of the max function count as we go for simplicity. + if (Counters[0] > MaxFunctionCount) + MaxFunctionCount = Counters[0]; + return instrprof_error::success; + } + + // We're updating a function we've seen before. + auto &FoundCounters = Where->second; + // If the number of counters doesn't match we either have bad data or a hash + // collision. + if (FoundCounters.size() != Counters.size()) + return instrprof_error::count_mismatch; + + for (size_t I = 0, E = Counters.size(); I < E; ++I) { + if (FoundCounters[I] + Counters[I] < FoundCounters[I]) + return instrprof_error::counter_overflow; + FoundCounters[I] += Counters[I]; + } + // We keep track of the max function count as we go for simplicity. + if (FoundCounters[0] > MaxFunctionCount) + MaxFunctionCount = FoundCounters[0]; + + return instrprof_error::success; +} + +std::pair<uint64_t, uint64_t> InstrProfWriter::writeImpl(raw_ostream &OS) { + OnDiskChainedHashTableGenerator<InstrProfRecordTrait> Generator; + + // Populate the hash table generator. + for (const auto &I : FunctionData) + Generator.insert(I.getKey(), &I.getValue()); + + using namespace llvm::support; + endian::Writer<little> LE(OS); + + // Write the header. + LE.write<uint64_t>(IndexedInstrProf::Magic); + LE.write<uint64_t>(IndexedInstrProf::Version); + LE.write<uint64_t>(MaxFunctionCount); + LE.write<uint64_t>(static_cast<uint64_t>(IndexedInstrProf::HashType)); + + // Save a space to write the hash table start location. + uint64_t HashTableStartLoc = OS.tell(); + LE.write<uint64_t>(0); + // Write the hash table. + uint64_t HashTableStart = Generator.Emit(OS); + + return std::make_pair(HashTableStartLoc, HashTableStart); +} + +void InstrProfWriter::write(raw_fd_ostream &OS) { + // Write the hash table. + auto TableStart = writeImpl(OS); + + // Go back and fill in the hash table start. + using namespace support; + OS.seek(TableStart.first); + endian::Writer<little>(OS).write<uint64_t>(TableStart.second); +} + +std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() { + std::string Data; + llvm::raw_string_ostream OS(Data); + // Write the hash table. + auto TableStart = writeImpl(OS); + OS.flush(); + + // Go back and fill in the hash table start. + using namespace support; + uint64_t Bytes = endian::byte_swap<uint64_t, little>(TableStart.second); + Data.replace(TableStart.first, sizeof(uint64_t), (const char *)&Bytes, + sizeof(uint64_t)); + + // Return this in an aligned memory buffer. + return MemoryBuffer::getMemBufferCopy(Data); +} diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp new file mode 100644 index 0000000..920c48a --- /dev/null +++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp @@ -0,0 +1,51 @@ +//=-- SampleProf.cpp - Sample profiling format support --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains common definitions used in the reading and writing of +// sample profile data. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" + +using namespace llvm; + +namespace { +class SampleProfErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.sampleprof"; } + std::string message(int IE) const override { + sampleprof_error E = static_cast<sampleprof_error>(IE); + switch (E) { + case sampleprof_error::success: + return "Success"; + case sampleprof_error::bad_magic: + return "Invalid file format (bad magic)"; + case sampleprof_error::unsupported_version: + return "Unsupported format version"; + case sampleprof_error::too_large: + return "Too much profile data"; + case sampleprof_error::truncated: + return "Truncated profile data"; + case sampleprof_error::malformed: + return "Malformed profile data"; + case sampleprof_error::unrecognized_format: + return "Unrecognized profile encoding format"; + } + llvm_unreachable("A value of sampleprof_error has no message."); + } +}; +} + +static ManagedStatic<SampleProfErrorCategoryType> ErrorCategory; + +const std::error_category &llvm::sampleprof_category() { + return *ErrorCategory; +} diff --git a/contrib/llvm/lib/ProfileData/SampleProfReader.cpp b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp new file mode 100644 index 0000000..b39bfd6 --- /dev/null +++ b/contrib/llvm/lib/ProfileData/SampleProfReader.cpp @@ -0,0 +1,399 @@ +//===- SampleProfReader.cpp - Read LLVM sample profile data ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that reads LLVM sample profiles. It +// supports two file formats: text and binary. The textual representation +// is useful for debugging and testing purposes. The binary representation +// is more compact, resulting in smaller file sizes. However, they can +// both be used interchangeably. +// +// NOTE: If you are making changes to the file format, please remember +// to document them in the Clang documentation at +// tools/clang/docs/UsersManual.rst. +// +// Text format +// ----------- +// +// Sample profiles are written as ASCII text. The file is divided into +// sections, which correspond to each of the functions executed at runtime. +// Each section has the following format +// +// function1:total_samples:total_head_samples +// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] +// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] +// ... +// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] +// +// The file may contain blank lines between sections and within a +// section. However, the spacing within a single line is fixed. Additional +// spaces will result in an error while reading the file. +// +// Function names must be mangled in order for the profile loader to +// match them in the current translation unit. The two numbers in the +// function header specify how many total samples were accumulated in the +// function (first number), and the total number of samples accumulated +// in the prologue of the function (second number). This head sample +// count provides an indicator of how frequently the function is invoked. +// +// Each sampled line may contain several items. Some are optional (marked +// below): +// +// a. Source line offset. This number represents the line number +// in the function where the sample was collected. The line number is +// always relative to the line where symbol of the function is +// defined. So, if the function has its header at line 280, the offset +// 13 is at line 293 in the file. +// +// Note that this offset should never be a negative number. This could +// happen in cases like macros. The debug machinery will register the +// line number at the point of macro expansion. So, if the macro was +// expanded in a line before the start of the function, the profile +// converter should emit a 0 as the offset (this means that the optimizers +// will not be able to associate a meaningful weight to the instructions +// in the macro). +// +// b. [OPTIONAL] Discriminator. This is used if the sampled program +// was compiled with DWARF discriminator support +// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators). +// DWARF discriminators are unsigned integer values that allow the +// compiler to distinguish between multiple execution paths on the +// same source line location. +// +// For example, consider the line of code ``if (cond) foo(); else bar();``. +// If the predicate ``cond`` is true 80% of the time, then the edge +// into function ``foo`` should be considered to be taken most of the +// time. But both calls to ``foo`` and ``bar`` are at the same source +// line, so a sample count at that line is not sufficient. The +// compiler needs to know which part of that line is taken more +// frequently. +// +// This is what discriminators provide. In this case, the calls to +// ``foo`` and ``bar`` will be at the same line, but will have +// different discriminator values. This allows the compiler to correctly +// set edge weights into ``foo`` and ``bar``. +// +// c. Number of samples. This is an integer quantity representing the +// number of samples collected by the profiler at this source +// location. +// +// d. [OPTIONAL] Potential call targets and samples. If present, this +// line contains a call instruction. This models both direct and +// number of samples. For example, +// +// 130: 7 foo:3 bar:2 baz:7 +// +// The above means that at relative line offset 130 there is a call +// instruction that calls one of ``foo()``, ``bar()`` and ``baz()``, +// with ``baz()`` being the relatively more frequently called target. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfReader.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" + +using namespace llvm::sampleprof; +using namespace llvm; + +/// \brief Print the samples collected for a function on stream \p OS. +/// +/// \param OS Stream to emit the output to. +void FunctionSamples::print(raw_ostream &OS) { + OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size() + << " sampled lines\n"; + for (const auto &SI : BodySamples) { + LineLocation Loc = SI.first; + const SampleRecord &Sample = SI.second; + OS << "\tline offset: " << Loc.LineOffset + << ", discriminator: " << Loc.Discriminator + << ", number of samples: " << Sample.getSamples(); + if (Sample.hasCalls()) { + OS << ", calls:"; + for (const auto &I : Sample.getCallTargets()) + OS << " " << I.first() << ":" << I.second; + } + OS << "\n"; + } + OS << "\n"; +} + +/// \brief Dump the function profile for \p FName. +/// +/// \param FName Name of the function to print. +/// \param OS Stream to emit the output to. +void SampleProfileReader::dumpFunctionProfile(StringRef FName, + raw_ostream &OS) { + OS << "Function: " << FName << ": "; + Profiles[FName].print(OS); +} + +/// \brief Dump all the function profiles found on stream \p OS. +void SampleProfileReader::dump(raw_ostream &OS) { + for (const auto &I : Profiles) + dumpFunctionProfile(I.getKey(), OS); +} + +/// \brief Load samples from a text file. +/// +/// See the documentation at the top of the file for an explanation of +/// the expected format. +/// +/// \returns true if the file was loaded successfully, false otherwise. +std::error_code SampleProfileReaderText::read() { + line_iterator LineIt(*Buffer, /*SkipBlanks=*/true, '#'); + + // Read the profile of each function. Since each function may be + // mentioned more than once, and we are collecting flat profiles, + // accumulate samples as we parse them. + Regex HeadRE("^([^0-9].*):([0-9]+):([0-9]+)$"); + Regex LineSampleRE("^([0-9]+)\\.?([0-9]+)?: ([0-9]+)(.*)$"); + Regex CallSampleRE(" +([^0-9 ][^ ]*):([0-9]+)"); + while (!LineIt.is_at_eof()) { + // Read the header of each function. + // + // Note that for function identifiers we are actually expecting + // mangled names, but we may not always get them. This happens when + // the compiler decides not to emit the function (e.g., it was inlined + // and removed). In this case, the binary will not have the linkage + // name for the function, so the profiler will emit the function's + // unmangled name, which may contain characters like ':' and '>' in its + // name (member functions, templates, etc). + // + // The only requirement we place on the identifier, then, is that it + // should not begin with a number. + SmallVector<StringRef, 4> Matches; + if (!HeadRE.match(*LineIt, &Matches)) { + reportParseError(LineIt.line_number(), + "Expected 'mangled_name:NUM:NUM', found " + *LineIt); + return sampleprof_error::malformed; + } + assert(Matches.size() == 4); + StringRef FName = Matches[1]; + unsigned NumSamples, NumHeadSamples; + Matches[2].getAsInteger(10, NumSamples); + Matches[3].getAsInteger(10, NumHeadSamples); + Profiles[FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[FName]; + FProfile.addTotalSamples(NumSamples); + FProfile.addHeadSamples(NumHeadSamples); + ++LineIt; + + // Now read the body. The body of the function ends when we reach + // EOF or when we see the start of the next function. + while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { + if (!LineSampleRE.match(*LineIt, &Matches)) { + reportParseError( + LineIt.line_number(), + "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); + return sampleprof_error::malformed; + } + assert(Matches.size() == 5); + unsigned LineOffset, NumSamples, Discriminator = 0; + Matches[1].getAsInteger(10, LineOffset); + if (Matches[2] != "") + Matches[2].getAsInteger(10, Discriminator); + Matches[3].getAsInteger(10, NumSamples); + + // If there are function calls in this line, generate a call sample + // entry for each call. + std::string CallsLine(Matches[4]); + while (CallsLine != "") { + SmallVector<StringRef, 3> CallSample; + if (!CallSampleRE.match(CallsLine, &CallSample)) { + reportParseError(LineIt.line_number(), + "Expected 'mangled_name:NUM', found " + CallsLine); + return sampleprof_error::malformed; + } + StringRef CalledFunction = CallSample[1]; + unsigned CalledFunctionSamples; + CallSample[2].getAsInteger(10, CalledFunctionSamples); + FProfile.addCalledTargetSamples(LineOffset, Discriminator, + CalledFunction, CalledFunctionSamples); + CallsLine = CallSampleRE.sub("", CallsLine); + } + + FProfile.addBodySamples(LineOffset, Discriminator, NumSamples); + ++LineIt; + } + } + + return sampleprof_error::success; +} + +template <typename T> ErrorOr<T> SampleProfileReaderBinary::readNumber() { + unsigned NumBytesRead = 0; + std::error_code EC; + uint64_t Val = decodeULEB128(Data, &NumBytesRead); + + if (Val > std::numeric_limits<T>::max()) + EC = sampleprof_error::malformed; + else if (Data + NumBytesRead > End) + EC = sampleprof_error::truncated; + else + EC = sampleprof_error::success; + + if (EC) { + reportParseError(0, EC.message()); + return EC; + } + + Data += NumBytesRead; + return static_cast<T>(Val); +} + +ErrorOr<StringRef> SampleProfileReaderBinary::readString() { + std::error_code EC; + StringRef Str(reinterpret_cast<const char *>(Data)); + if (Data + Str.size() + 1 > End) { + EC = sampleprof_error::truncated; + reportParseError(0, EC.message()); + return EC; + } + + Data += Str.size() + 1; + return Str; +} + +std::error_code SampleProfileReaderBinary::read() { + while (!at_eof()) { + auto FName(readString()); + if (std::error_code EC = FName.getError()) + return EC; + + Profiles[*FName] = FunctionSamples(); + FunctionSamples &FProfile = Profiles[*FName]; + + auto Val = readNumber<unsigned>(); + if (std::error_code EC = Val.getError()) + return EC; + FProfile.addTotalSamples(*Val); + + Val = readNumber<unsigned>(); + if (std::error_code EC = Val.getError()) + return EC; + FProfile.addHeadSamples(*Val); + + // Read the samples in the body. + auto NumRecords = readNumber<unsigned>(); + if (std::error_code EC = NumRecords.getError()) + return EC; + for (unsigned I = 0; I < *NumRecords; ++I) { + auto LineOffset = readNumber<uint64_t>(); + if (std::error_code EC = LineOffset.getError()) + return EC; + + auto Discriminator = readNumber<uint64_t>(); + if (std::error_code EC = Discriminator.getError()) + return EC; + + auto NumSamples = readNumber<uint64_t>(); + if (std::error_code EC = NumSamples.getError()) + return EC; + + auto NumCalls = readNumber<unsigned>(); + if (std::error_code EC = NumCalls.getError()) + return EC; + + for (unsigned J = 0; J < *NumCalls; ++J) { + auto CalledFunction(readString()); + if (std::error_code EC = CalledFunction.getError()) + return EC; + + auto CalledFunctionSamples = readNumber<uint64_t>(); + if (std::error_code EC = CalledFunctionSamples.getError()) + return EC; + + FProfile.addCalledTargetSamples(*LineOffset, *Discriminator, + *CalledFunction, + *CalledFunctionSamples); + } + + FProfile.addBodySamples(*LineOffset, *Discriminator, *NumSamples); + } + } + + return sampleprof_error::success; +} + +std::error_code SampleProfileReaderBinary::readHeader() { + Data = reinterpret_cast<const uint8_t *>(Buffer->getBufferStart()); + End = Data + Buffer->getBufferSize(); + + // Read and check the magic identifier. + auto Magic = readNumber<uint64_t>(); + if (std::error_code EC = Magic.getError()) + return EC; + else if (*Magic != SPMagic()) + return sampleprof_error::bad_magic; + + // Read the version number. + auto Version = readNumber<uint64_t>(); + if (std::error_code EC = Version.getError()) + return EC; + else if (*Version != SPVersion()) + return sampleprof_error::unsupported_version; + + return sampleprof_error::success; +} + +bool SampleProfileReaderBinary::hasFormat(const MemoryBuffer &Buffer) { + const uint8_t *Data = + reinterpret_cast<const uint8_t *>(Buffer.getBufferStart()); + uint64_t Magic = decodeULEB128(Data); + return Magic == SPMagic(); +} + +/// \brief Prepare a memory buffer for the contents of \p Filename. +/// +/// \returns an error code indicating the status of the buffer. +static ErrorOr<std::unique_ptr<MemoryBuffer>> +setupMemoryBuffer(std::string Filename) { + auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferOrErr.getError()) + return EC; + auto Buffer = std::move(BufferOrErr.get()); + + // Sanity check the file. + if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) + return sampleprof_error::too_large; + + return std::move(Buffer); +} + +/// \brief Create a sample profile reader based on the format of the input file. +/// +/// \param Filename The file to open. +/// +/// \param Reader The reader to instantiate according to \p Filename's format. +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr<std::unique_ptr<SampleProfileReader>> +SampleProfileReader::create(StringRef Filename, LLVMContext &C) { + auto BufferOrError = setupMemoryBuffer(Filename); + if (std::error_code EC = BufferOrError.getError()) + return EC; + + auto Buffer = std::move(BufferOrError.get()); + std::unique_ptr<SampleProfileReader> Reader; + if (SampleProfileReaderBinary::hasFormat(*Buffer)) + Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C)); + else + Reader.reset(new SampleProfileReaderText(std::move(Buffer), C)); + + if (std::error_code EC = Reader->readHeader()) + return EC; + + return std::move(Reader); +} diff --git a/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp new file mode 100644 index 0000000..c95267a --- /dev/null +++ b/contrib/llvm/lib/ProfileData/SampleProfWriter.cpp @@ -0,0 +1,126 @@ +//===- SampleProfWriter.cpp - Write LLVM sample profile data --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the class that writes LLVM sample profiles. It +// supports two file formats: text and binary. The textual representation +// is useful for debugging and testing purposes. The binary representation +// is more compact, resulting in smaller file sizes. However, they can +// both be used interchangeably. +// +// See lib/ProfileData/SampleProfReader.cpp for documentation on each of the +// supported formats. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ProfileData/SampleProfWriter.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorOr.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/LineIterator.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Regex.h" + +using namespace llvm::sampleprof; +using namespace llvm; + +/// \brief Write samples to a text file. +bool SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) { + if (S.empty()) + return true; + + OS << FName << ":" << S.getTotalSamples() << ":" << S.getHeadSamples() + << "\n"; + + for (const auto &I : S.getBodySamples()) { + LineLocation Loc = I.first; + const SampleRecord &Sample = I.second; + if (Loc.Discriminator == 0) + OS << Loc.LineOffset << ": "; + else + OS << Loc.LineOffset << "." << Loc.Discriminator << ": "; + + OS << Sample.getSamples(); + + for (const auto &J : Sample.getCallTargets()) + OS << " " << J.first() << ":" << J.second; + OS << "\n"; + } + + return true; +} + +SampleProfileWriterBinary::SampleProfileWriterBinary(StringRef F, + std::error_code &EC) + : SampleProfileWriter(F, EC, sys::fs::F_None) { + if (EC) + return; + + // Write the file header. + encodeULEB128(SPMagic(), OS); + encodeULEB128(SPVersion(), OS); +} + +/// \brief Write samples to a binary file. +/// +/// \returns true if the samples were written successfully, false otherwise. +bool SampleProfileWriterBinary::write(StringRef FName, + const FunctionSamples &S) { + if (S.empty()) + return true; + + OS << FName; + encodeULEB128(0, OS); + encodeULEB128(S.getTotalSamples(), OS); + encodeULEB128(S.getHeadSamples(), OS); + encodeULEB128(S.getBodySamples().size(), OS); + for (const auto &I : S.getBodySamples()) { + LineLocation Loc = I.first; + const SampleRecord &Sample = I.second; + encodeULEB128(Loc.LineOffset, OS); + encodeULEB128(Loc.Discriminator, OS); + encodeULEB128(Sample.getSamples(), OS); + encodeULEB128(Sample.getCallTargets().size(), OS); + for (const auto &J : Sample.getCallTargets()) { + std::string Callee = J.first(); + unsigned CalleeSamples = J.second; + OS << Callee; + encodeULEB128(0, OS); + encodeULEB128(CalleeSamples, OS); + } + } + + return true; +} + +/// \brief Create a sample profile writer based on the specified format. +/// +/// \param Filename The file to create. +/// +/// \param Writer The writer to instantiate according to the specified format. +/// +/// \param Format Encoding format for the profile file. +/// +/// \returns an error code indicating the status of the created writer. +ErrorOr<std::unique_ptr<SampleProfileWriter>> +SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { + std::error_code EC; + std::unique_ptr<SampleProfileWriter> Writer; + + if (Format == SPF_Binary) + Writer.reset(new SampleProfileWriterBinary(Filename, EC)); + else if (Format == SPF_Text) + Writer.reset(new SampleProfileWriterText(Filename, EC)); + else + EC = sampleprof_error::unrecognized_format; + + if (EC) + return EC; + + return std::move(Writer); +} |