diff options
Diffstat (limited to 'utils/FileCheck')
-rw-r--r-- | utils/FileCheck/FileCheck.cpp | 286 |
1 files changed, 212 insertions, 74 deletions
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index e791628..b0ef67a 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -17,17 +17,21 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" #include "llvm/Support/Regex.h" +#include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Signals.h" #include "llvm/Support/system_error.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringMap.h" #include <algorithm> +#include <map> +#include <string> +#include <vector> using namespace llvm; static cl::opt<std::string> @@ -63,23 +67,29 @@ class Pattern { /// RegEx - If non-empty, this is a regex pattern. std::string RegExStr; + /// \brief Contains the number of line this pattern is in. + unsigned LineNumber; + /// VariableUses - Entries in this vector map to uses of a variable in the /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain /// "foobaz" and we'll get an entry in this vector that tells us to insert the /// value of bar at offset 3. std::vector<std::pair<StringRef, unsigned> > VariableUses; - /// VariableDefs - Entries in this vector map to definitions of a variable in - /// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will - /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The - /// index indicates what parenthesized value captures the variable value. - std::vector<std::pair<StringRef, unsigned> > VariableDefs; + /// VariableDefs - Maps definitions of variables to their parenthesized + /// capture numbers. + /// E.g. for the pattern "foo[[bar:.*]]baz", VariableDefs will map "bar" to 1. + std::map<StringRef, unsigned> VariableDefs; public: Pattern(bool matchEOF = false) : MatchEOF(matchEOF) { } - bool ParsePattern(StringRef PatternStr, SourceMgr &SM); + /// ParsePattern - Parse the given string into the Pattern. SM provides the + /// SourceMgr used for error reports, and LineNumber is the line number in + /// the input file from which the pattern string was read. + /// Returns true in case of an error, false otherwise. + bool ParsePattern(StringRef PatternStr, SourceMgr &SM, unsigned LineNumber); /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If @@ -97,17 +107,31 @@ public: private: static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr); - bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM); + bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); + void AddBackrefToRegEx(unsigned BackrefNum); /// ComputeMatchDistance - Compute an arbitrary estimate for the quality of /// matching this pattern at the start of \arg Buffer; a distance of zero /// should correspond to a perfect match. unsigned ComputeMatchDistance(StringRef Buffer, const StringMap<StringRef> &VariableTable) const; + + /// \brief Evaluates expression and stores the result to \p Value. + /// \return true on success. false when the expression has invalid syntax. + bool EvaluateExpression(StringRef Expr, std::string &Value) const; + + /// \brief Finds the closing sequence of a regex variable usage or + /// definition. Str has to point in the beginning of the definition + /// (right after the opening sequence). + /// \return offset of the closing sequence within Str, or npos if it was not + /// found. + size_t FindRegexVarEnd(StringRef Str); }; -bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { +bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM, + unsigned LineNumber) { + this->LineNumber = LineNumber; PatternLoc = SMLoc::getFromPointer(PatternStr.data()); // Ignore trailing whitespace. @@ -140,8 +164,7 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { while (!PatternStr.empty()) { // RegEx matches. if (PatternStr.startswith("{{")) { - - // Otherwise, this is the start of a regex match. Scan for the }}. + // This is the start of a regex match. Scan for the }}. size_t End = PatternStr.find("}}"); if (End == StringRef::npos) { SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), @@ -171,8 +194,10 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { // itself must be of the form "[a-zA-Z_][0-9a-zA-Z_]*", otherwise we reject // it. This is to catch some common errors. if (PatternStr.startswith("[[")) { - // Verify that it is terminated properly. - size_t End = PatternStr.find("]]"); + // Find the closing bracket pair ending the match. End is going to be an + // offset relative to the beginning of the match string. + size_t End = FindRegexVarEnd(PatternStr.substr(2)); + if (End == StringRef::npos) { SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), SourceMgr::DK_Error, @@ -180,8 +205,8 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { return true; } - StringRef MatchStr = PatternStr.substr(2, End-2); - PatternStr = PatternStr.substr(End+2); + StringRef MatchStr = PatternStr.substr(2, End); + PatternStr = PatternStr.substr(End+4); // Get the regex name (e.g. "foo"). size_t NameEnd = MatchStr.find(':'); @@ -193,16 +218,31 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { return true; } - // Verify that the name is well formed. - for (unsigned i = 0, e = Name.size(); i != e; ++i) - if (Name[i] != '_' && !isalnum(Name[i])) { + // Verify that the name/expression is well formed. FileCheck currently + // supports @LINE, @LINE+number, @LINE-number expressions. The check here + // is relaxed, more strict check is performed in \c EvaluateExpression. + bool IsExpression = false; + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + if (i == 0 && Name[i] == '@') { + if (NameEnd != StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), + SourceMgr::DK_Error, + "invalid name in named regex definition"); + return true; + } + IsExpression = true; + continue; + } + if (Name[i] != '_' && !isalnum(Name[i]) && + (!IsExpression || (Name[i] != '+' && Name[i] != '-'))) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), SourceMgr::DK_Error, "invalid name in named regex"); return true; } + } // Name can't start with a digit. - if (isdigit(Name[0])) { + if (isdigit(static_cast<unsigned char>(Name[0]))) { SM.PrintMessage(SMLoc::getFromPointer(Name.data()), SourceMgr::DK_Error, "invalid name in named regex"); return true; @@ -210,12 +250,25 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { // Handle [[foo]]. if (NameEnd == StringRef::npos) { - VariableUses.push_back(std::make_pair(Name, RegExStr.size())); + // Handle variables that were defined earlier on the same line by + // emitting a backreference. + if (VariableDefs.find(Name) != VariableDefs.end()) { + unsigned VarParenNum = VariableDefs[Name]; + if (VarParenNum < 1 || VarParenNum > 9) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), + SourceMgr::DK_Error, + "Can't back-reference more than 9 variables"); + return true; + } + AddBackrefToRegEx(VarParenNum); + } else { + VariableUses.push_back(std::make_pair(Name, RegExStr.size())); + } continue; } // Handle [[foo:.*]]. - VariableDefs.push_back(std::make_pair(Name, CurParen)); + VariableDefs[Name] = CurParen; RegExStr += '('; ++CurParen; @@ -231,7 +284,6 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr); PatternStr = PatternStr.substr(FixedMatchEnd); - continue; } return false; @@ -264,21 +316,46 @@ void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) { } } -bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, +bool Pattern::AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM) { - Regex R(RegexStr); + Regex R(RS); std::string Error; if (!R.isValid(Error)) { - SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), SourceMgr::DK_Error, + SM.PrintMessage(SMLoc::getFromPointer(RS.data()), SourceMgr::DK_Error, "invalid regex: " + Error); return true; } - RegExStr += RegexStr.str(); + RegExStr += RS.str(); CurParen += R.getNumMatches(); return false; } +void Pattern::AddBackrefToRegEx(unsigned BackrefNum) { + assert(BackrefNum >= 1 && BackrefNum <= 9 && "Invalid backref number"); + std::string Backref = std::string("\\") + + std::string(1, '0' + BackrefNum); + RegExStr += Backref; +} + +bool Pattern::EvaluateExpression(StringRef Expr, std::string &Value) const { + // The only supported expression is @LINE([\+-]\d+)? + if (!Expr.startswith("@LINE")) + return false; + Expr = Expr.substr(StringRef("@LINE").size()); + int Offset = 0; + if (!Expr.empty()) { + if (Expr[0] == '+') + Expr = Expr.substr(1); + else if (Expr[0] != '-') + return false; + if (Expr.getAsInteger(10, Offset)) + return false; + } + Value = llvm::itostr(LineNumber + Offset); + return true; +} + /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. @@ -307,15 +384,21 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, unsigned InsertOffset = 0; for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { - StringMap<StringRef>::iterator it = - VariableTable.find(VariableUses[i].first); - // If the variable is undefined, return an error. - if (it == VariableTable.end()) - return StringRef::npos; - - // Look up the value and escape it so that we can plop it into the regex. std::string Value; - AddFixedStringToRegEx(it->second, Value); + + if (VariableUses[i].first[0] == '@') { + if (!EvaluateExpression(VariableUses[i].first, Value)) + return StringRef::npos; + } else { + StringMap<StringRef>::iterator it = + VariableTable.find(VariableUses[i].first); + // If the variable is undefined, return an error. + if (it == VariableTable.end()) + return StringRef::npos; + + // Look up the value and escape it so that we can plop it into the regex. + AddFixedStringToRegEx(it->second, Value); + } // Plop it into the regex at the adjusted offset. TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, @@ -337,10 +420,11 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, StringRef FullMatch = MatchInfo[0]; // If this defines any variables, remember their values. - for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) { - assert(VariableDefs[i].second < MatchInfo.size() && - "Internal paren error"); - VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second]; + for (std::map<StringRef, unsigned>::const_iterator I = VariableDefs.begin(), + E = VariableDefs.end(); + I != E; ++I) { + assert(I->second < MatchInfo.size() && "Internal paren error"); + VariableTable[I->first] = MatchInfo[I->second]; } MatchLen = FullMatch.size(); @@ -371,19 +455,31 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, // variable values. if (!VariableUses.empty()) { for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { - StringRef Var = VariableUses[i].first; - StringMap<StringRef>::const_iterator it = VariableTable.find(Var); SmallString<256> Msg; raw_svector_ostream OS(Msg); - - // Check for undefined variable references. - if (it == VariableTable.end()) { - OS << "uses undefined variable \""; - OS.write_escaped(Var) << "\"";; + StringRef Var = VariableUses[i].first; + if (Var[0] == '@') { + std::string Value; + if (EvaluateExpression(Var, Value)) { + OS << "with expression \""; + OS.write_escaped(Var) << "\" equal to \""; + OS.write_escaped(Value) << "\""; + } else { + OS << "uses incorrect expression \""; + OS.write_escaped(Var) << "\""; + } } else { - OS << "with variable \""; - OS.write_escaped(Var) << "\" equal to \""; - OS.write_escaped(it->second) << "\""; + StringMap<StringRef>::const_iterator it = VariableTable.find(Var); + + // Check for undefined variable references. + if (it == VariableTable.end()) { + OS << "uses undefined variable \""; + OS.write_escaped(Var) << "\""; + } else { + OS << "with variable \""; + OS.write_escaped(Var) << "\" equal to \""; + OS.write_escaped(it->second) << "\""; + } } SM.PrintMessage(SMLoc::getFromPointer(Buffer.data()), SourceMgr::DK_Note, @@ -432,6 +528,40 @@ void Pattern::PrintFailureInfo(const SourceMgr &SM, StringRef Buffer, } } +size_t Pattern::FindRegexVarEnd(StringRef Str) { + // Offset keeps track of the current offset within the input Str + size_t Offset = 0; + // [...] Nesting depth + size_t BracketDepth = 0; + + while (!Str.empty()) { + if (Str.startswith("]]") && BracketDepth == 0) + return Offset; + if (Str[0] == '\\') { + // Backslash escapes the next char within regexes, so skip them both. + Str = Str.substr(2); + Offset += 2; + } else { + switch (Str[0]) { + default: + break; + case '[': + BracketDepth++; + break; + case ']': + assert(BracketDepth > 0 && "Invalid regex"); + BracketDepth--; + break; + } + Str = Str.substr(1); + Offset++; + } + } + + return StringRef::npos; +} + + //===----------------------------------------------------------------------===// // Check Strings. //===----------------------------------------------------------------------===// @@ -457,9 +587,13 @@ struct CheckString { : Pat(P), Loc(L), IsCheckNext(isCheckNext) {} }; -/// CanonicalizeInputFile - Remove duplicate horizontal space from the specified -/// memory buffer, free it, and return a new one. -static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { +/// Canonicalize whitespaces in the input file. Line endings are replaced +/// with UNIX-style '\n'. +/// +/// \param PreserveHorizontal Don't squash consecutive horizontal whitespace +/// characters to a single space. +static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB, + bool PreserveHorizontal) { SmallString<128> NewFile; NewFile.reserve(MB->getBufferSize()); @@ -470,8 +604,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { continue; } - // If current char is not a horizontal whitespace, dump it to output as is. - if (*Ptr != ' ' && *Ptr != '\t') { + // If current char is not a horizontal whitespace or if horizontal + // whitespace canonicalization is disabled, dump it to output as is. + if (PreserveHorizontal || (*Ptr != ' ' && *Ptr != '\t')) { NewFile.push_back(*Ptr); continue; } @@ -494,9 +629,9 @@ static MemoryBuffer *CanonicalizeInputFile(MemoryBuffer *MB) { /// ReadCheckFile - Read the check file, which specifies the sequence of /// expected strings. The strings are added to the CheckStrings vector. +/// Returns true in case of an error, false otherwise. static bool ReadCheckFile(SourceMgr &SM, std::vector<CheckString> &CheckStrings) { - // Open the check file, and tell SourceMgr about it. OwningPtr<MemoryBuffer> File; if (error_code ec = MemoryBuffer::getFileOrSTDIN(CheckFilename.c_str(), File)) { @@ -504,28 +639,33 @@ static bool ReadCheckFile(SourceMgr &SM, << ec.message() << '\n'; return true; } - MemoryBuffer *F = File.take(); // If we want to canonicalize whitespace, strip excess whitespace from the - // buffer containing the CHECK lines. - if (!NoCanonicalizeWhiteSpace) - F = CanonicalizeInputFile(F); + // buffer containing the CHECK lines. Remove DOS style line endings. + MemoryBuffer *F = + CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace); SM.AddNewSourceBuffer(F, SMLoc()); // Find all instances of CheckPrefix followed by : in the file. StringRef Buffer = F->getBuffer(); - std::vector<std::pair<SMLoc, Pattern> > NotMatches; + // LineNumber keeps track of the line on which CheckPrefix instances are + // found. + unsigned LineNumber = 1; + while (1) { // See if Prefix occurs in the memory buffer. - Buffer = Buffer.substr(Buffer.find(CheckPrefix)); - + size_t PrefixLoc = Buffer.find(CheckPrefix); // If we didn't find a match, we're done. - if (Buffer.empty()) + if (PrefixLoc == StringRef::npos) break; + LineNumber += Buffer.substr(0, PrefixLoc).count('\n'); + + Buffer = Buffer.substr(PrefixLoc); + const char *CheckPrefixStart = Buffer.data(); // When we find a check prefix, keep track of whether we find CHECK: or @@ -560,12 +700,11 @@ static bool ReadCheckFile(SourceMgr &SM, // Parse the pattern. Pattern P; - if (P.ParsePattern(Buffer.substr(0, EOL), SM)) + if (P.ParsePattern(Buffer.substr(0, EOL), SM, LineNumber)) return true; Buffer = Buffer.substr(EOL); - // Verify that CHECK-NEXT lines have at least one CHECK line before them. if (IsCheckNext && CheckStrings.empty()) { SM.PrintMessage(SMLoc::getFromPointer(CheckPrefixStart), @@ -582,7 +721,6 @@ static bool ReadCheckFile(SourceMgr &SM, continue; } - // Okay, add the string we captured to the output vector and move on. CheckStrings.push_back(CheckString(P, PatternLoc, @@ -663,18 +801,18 @@ int main(int argc, char **argv) { MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { errs() << "Could not open input file '" << InputFilename << "': " << ec.message() << '\n'; - return true; + return 2; } - MemoryBuffer *F = File.take(); - if (F->getBufferSize() == 0) { + if (File->getBufferSize() == 0) { errs() << "FileCheck error: '" << InputFilename << "' is empty.\n"; - return 1; + return 2; } - + // Remove duplicate spaces in the input file if requested. - if (!NoCanonicalizeWhiteSpace) - F = CanonicalizeInputFile(F); + // Remove DOS style line endings. + MemoryBuffer *F = + CanonicalizeInputFile(File.take(), NoCanonicalizeWhiteSpace); SM.AddNewSourceBuffer(F, SMLoc()); |