summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Support/CommandLine.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Support/CommandLine.cpp')
-rw-r--r--contrib/llvm/lib/Support/CommandLine.cpp352
1 files changed, 264 insertions, 88 deletions
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index 18d3db5..44a88d8 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -17,12 +17,14 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Config/config.h"
+#include "llvm/Support/ConvertUTF.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Host.h"
@@ -58,6 +60,7 @@ TEMPLATE_INSTANTIATION(class opt<char>);
TEMPLATE_INSTANTIATION(class opt<bool>);
} } // end namespace llvm::cl
+// Pin the vtables to this file.
void GenericOptionValue::anchor() {}
void OptionValue<boolOrDefault>::anchor() {}
void OptionValue<std::string>::anchor() {}
@@ -72,6 +75,7 @@ void parser<double>::anchor() {}
void parser<float>::anchor() {}
void parser<std::string>::anchor() {}
void parser<char>::anchor() {}
+void StringSaver::anchor() {}
//===----------------------------------------------------------------------===//
@@ -435,39 +439,248 @@ static bool EatsUnboundedNumberOfValues(const Option *O) {
O->getNumOccurrencesFlag() == cl::OneOrMore;
}
-/// ParseCStringVector - Break INPUT up wherever one or more
-/// whitespace characters are found, and store the resulting tokens in
-/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated
-/// using strdup(), so it is the caller's responsibility to free()
-/// them later.
+static bool isWhitespace(char C) {
+ return strchr(" \t\n\r\f\v", C);
+}
+
+static bool isQuote(char C) {
+ return C == '\"' || C == '\'';
+}
+
+static bool isGNUSpecial(char C) {
+ return strchr("\\\"\' ", C);
+}
+
+void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv) {
+ SmallString<128> Token;
+ for (size_t I = 0, E = Src.size(); I != E; ++I) {
+ // Consume runs of whitespace.
+ if (Token.empty()) {
+ while (I != E && isWhitespace(Src[I]))
+ ++I;
+ if (I == E) break;
+ }
+
+ // Backslashes can escape backslashes, spaces, and other quotes. Otherwise
+ // they are literal. This makes it much easier to read Windows file paths.
+ if (I + 1 < E && Src[I] == '\\' && isGNUSpecial(Src[I + 1])) {
+ ++I; // Skip the escape.
+ Token.push_back(Src[I]);
+ continue;
+ }
+
+ // Consume a quoted string.
+ if (isQuote(Src[I])) {
+ char Quote = Src[I++];
+ while (I != E && Src[I] != Quote) {
+ // Backslashes are literal, unless they escape a special character.
+ if (Src[I] == '\\' && I + 1 != E && isGNUSpecial(Src[I + 1]))
+ ++I;
+ Token.push_back(Src[I]);
+ ++I;
+ }
+ if (I == E) break;
+ continue;
+ }
+
+ // End the token if this is whitespace.
+ if (isWhitespace(Src[I])) {
+ if (!Token.empty())
+ NewArgv.push_back(Saver.SaveString(Token.c_str()));
+ Token.clear();
+ continue;
+ }
+
+ // This is a normal character. Append it.
+ Token.push_back(Src[I]);
+ }
+
+ // Append the last token after hitting EOF with no whitespace.
+ if (!Token.empty())
+ NewArgv.push_back(Saver.SaveString(Token.c_str()));
+}
+
+/// Backslashes are interpreted in a rather complicated way in the Windows-style
+/// command line, because backslashes are used both to separate path and to
+/// escape double quote. This method consumes runs of backslashes as well as the
+/// following double quote if it's escaped.
+///
+/// * If an even number of backslashes is followed by a double quote, one
+/// backslash is output for every pair of backslashes, and the last double
+/// quote remains unconsumed. The double quote will later be interpreted as
+/// the start or end of a quoted string in the main loop outside of this
+/// function.
+///
+/// * If an odd number of backslashes is followed by a double quote, one
+/// backslash is output for every pair of backslashes, and a double quote is
+/// output for the last pair of backslash-double quote. The double quote is
+/// consumed in this case.
///
-static void ParseCStringVector(std::vector<char *> &OutputVector,
- const char *Input) {
- // Characters which will be treated as token separators:
- StringRef Delims = " \v\f\t\r\n";
-
- StringRef WorkStr(Input);
- while (!WorkStr.empty()) {
- // If the first character is a delimiter, strip them off.
- if (Delims.find(WorkStr[0]) != StringRef::npos) {
- size_t Pos = WorkStr.find_first_not_of(Delims);
- if (Pos == StringRef::npos) Pos = WorkStr.size();
- WorkStr = WorkStr.substr(Pos);
+/// * Otherwise, backslashes are interpreted literally.
+static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) {
+ size_t E = Src.size();
+ int BackslashCount = 0;
+ // Skip the backslashes.
+ do {
+ ++I;
+ ++BackslashCount;
+ } while (I != E && Src[I] == '\\');
+
+ bool FollowedByDoubleQuote = (I != E && Src[I] == '"');
+ if (FollowedByDoubleQuote) {
+ Token.append(BackslashCount / 2, '\\');
+ if (BackslashCount % 2 == 0)
+ return I - 1;
+ Token.push_back('"');
+ return I;
+ }
+ Token.append(BackslashCount, '\\');
+ return I - 1;
+}
+
+void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
+ SmallVectorImpl<const char *> &NewArgv) {
+ SmallString<128> Token;
+
+ // This is a small state machine to consume characters until it reaches the
+ // end of the source string.
+ enum { INIT, UNQUOTED, QUOTED } State = INIT;
+ for (size_t I = 0, E = Src.size(); I != E; ++I) {
+ // INIT state indicates that the current input index is at the start of
+ // the string or between tokens.
+ if (State == INIT) {
+ if (isWhitespace(Src[I]))
+ continue;
+ if (Src[I] == '"') {
+ State = QUOTED;
+ continue;
+ }
+ if (Src[I] == '\\') {
+ I = parseBackslash(Src, I, Token);
+ State = UNQUOTED;
+ continue;
+ }
+ Token.push_back(Src[I]);
+ State = UNQUOTED;
continue;
}
- // Find position of first delimiter.
- size_t Pos = WorkStr.find_first_of(Delims);
- if (Pos == StringRef::npos) Pos = WorkStr.size();
+ // UNQUOTED state means that it's reading a token not quoted by double
+ // quotes.
+ if (State == UNQUOTED) {
+ // Whitespace means the end of the token.
+ if (isWhitespace(Src[I])) {
+ NewArgv.push_back(Saver.SaveString(Token.c_str()));
+ Token.clear();
+ State = INIT;
+ continue;
+ }
+ if (Src[I] == '"') {
+ State = QUOTED;
+ continue;
+ }
+ if (Src[I] == '\\') {
+ I = parseBackslash(Src, I, Token);
+ continue;
+ }
+ Token.push_back(Src[I]);
+ continue;
+ }
- // Everything from 0 to Pos is the next word to copy.
- char *NewStr = (char*)malloc(Pos+1);
- memcpy(NewStr, WorkStr.data(), Pos);
- NewStr[Pos] = 0;
- OutputVector.push_back(NewStr);
+ // QUOTED state means that it's reading a token quoted by double quotes.
+ if (State == QUOTED) {
+ if (Src[I] == '"') {
+ State = UNQUOTED;
+ continue;
+ }
+ if (Src[I] == '\\') {
+ I = parseBackslash(Src, I, Token);
+ continue;
+ }
+ Token.push_back(Src[I]);
+ }
+ }
+ // Append the last token after hitting EOF with no whitespace.
+ if (!Token.empty())
+ NewArgv.push_back(Saver.SaveString(Token.c_str()));
+}
- WorkStr = WorkStr.substr(Pos);
+static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
+ TokenizerCallback Tokenizer,
+ SmallVectorImpl<const char *> &NewArgv) {
+ OwningPtr<MemoryBuffer> MemBuf;
+ if (MemoryBuffer::getFile(FName, MemBuf))
+ return false;
+ StringRef Str(MemBuf->getBufferStart(), MemBuf->getBufferSize());
+
+ // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing.
+ ArrayRef<char> BufRef(MemBuf->getBufferStart(), MemBuf->getBufferEnd());
+ std::string UTF8Buf;
+ if (hasUTF16ByteOrderMark(BufRef)) {
+ if (!convertUTF16ToUTF8String(BufRef, UTF8Buf))
+ return false;
+ Str = StringRef(UTF8Buf);
}
+
+ // Tokenize the contents into NewArgv.
+ Tokenizer(Str, Saver, NewArgv);
+
+ return true;
+}
+
+/// \brief Expand response files on a command line recursively using the given
+/// StringSaver and tokenization strategy.
+bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
+ SmallVectorImpl<const char *> &Argv) {
+ unsigned RspFiles = 0;
+ bool AllExpanded = false;
+
+ // Don't cache Argv.size() because it can change.
+ for (unsigned I = 0; I != Argv.size(); ) {
+ const char *Arg = Argv[I];
+ if (Arg[0] != '@') {
+ ++I;
+ continue;
+ }
+
+ // If we have too many response files, leave some unexpanded. This avoids
+ // crashing on self-referential response files.
+ if (RspFiles++ > 20)
+ return false;
+
+ // Replace this response file argument with the tokenization of its
+ // contents. Nested response files are expanded in subsequent iterations.
+ // FIXME: If a nested response file uses a relative path, is it relative to
+ // the cwd of the process or the response file?
+ SmallVector<const char *, 0> ExpandedArgv;
+ if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv)) {
+ AllExpanded = false;
+ continue;
+ }
+ Argv.erase(Argv.begin() + I);
+ Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end());
+ }
+ return AllExpanded;
+}
+
+namespace {
+ class StrDupSaver : public StringSaver {
+ std::vector<char*> Dups;
+ public:
+ ~StrDupSaver() {
+ for (std::vector<char *>::iterator I = Dups.begin(), E = Dups.end();
+ I != E; ++I) {
+ char *Dup = *I;
+ free(Dup);
+ }
+ }
+ const char *SaveString(const char *Str) LLVM_OVERRIDE {
+ char *Dup = strdup(Str);
+ Dups.push_back(Dup);
+ return Dup;
+ }
+ };
}
/// ParseEnvironmentOptions - An alternative entry point to the
@@ -488,56 +701,15 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
// Get program's "name", which we wouldn't know without the caller
// telling us.
- std::vector<char*> newArgv;
- newArgv.push_back(strdup(progName));
+ SmallVector<const char *, 20> newArgv;
+ StrDupSaver Saver;
+ newArgv.push_back(Saver.SaveString(progName));
// Parse the value of the environment variable into a "command line"
// and hand it off to ParseCommandLineOptions().
- ParseCStringVector(newArgv, envValue);
+ TokenizeGNUCommandLine(envValue, Saver, newArgv);
int newArgc = static_cast<int>(newArgv.size());
ParseCommandLineOptions(newArgc, &newArgv[0], Overview);
-
- // Free all the strdup()ed strings.
- for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
- i != e; ++i)
- free(*i);
-}
-
-
-/// ExpandResponseFiles - Copy the contents of argv into newArgv,
-/// substituting the contents of the response files for the arguments
-/// of type @file.
-static void ExpandResponseFiles(unsigned argc, const char*const* argv,
- std::vector<char*>& newArgv) {
- for (unsigned i = 1; i != argc; ++i) {
- const char *arg = argv[i];
-
- if (arg[0] == '@') {
- sys::PathWithStatus respFile(++arg);
-
- // Check that the response file is not empty (mmap'ing empty
- // files can be problematic).
- const sys::FileStatus *FileStat = respFile.getFileStatus();
- if (FileStat && FileStat->getSize() != 0) {
-
- // If we could open the file, parse its contents, otherwise
- // pass the @file option verbatim.
-
- // TODO: we should also support recursive loading of response files,
- // since this is how gcc behaves. (From their man page: "The file may
- // itself contain additional @file options; any such options will be
- // processed recursively.")
-
- // Mmap the response file into memory.
- OwningPtr<MemoryBuffer> respFilePtr;
- if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) {
- ParseCStringVector(newArgv, respFilePtr->getBufferStart());
- continue;
- }
- }
- }
- newArgv.push_back(strdup(arg));
- }
}
void cl::ParseCommandLineOptions(int argc, const char * const *argv,
@@ -552,9 +724,11 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
"No options specified!");
// Expand response files.
- std::vector<char*> newArgv;
- newArgv.push_back(strdup(argv[0]));
- ExpandResponseFiles(argc, argv, newArgv);
+ SmallVector<const char *, 20> newArgv;
+ for (int i = 0; i != argc; ++i)
+ newArgv.push_back(argv[i]);
+ StrDupSaver Saver;
+ ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv);
argv = &newArgv[0];
argc = static_cast<int>(newArgv.size());
@@ -848,12 +1022,6 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv,
PositionalOpts.clear();
MoreHelp->clear();
- // Free the memory allocated by ExpandResponseFiles.
- // Free all the strdup()ed strings.
- for (std::vector<char*>::iterator i = newArgv.begin(), e = newArgv.end();
- i != e; ++i)
- free(*i);
-
// If we had an error processing our arguments, don't let the program execute
if (ErrorParsing) exit(1);
}
@@ -913,11 +1081,20 @@ size_t alias::getOptionWidth() const {
return std::strlen(ArgStr)+6;
}
+static void printHelpStr(StringRef HelpStr, size_t Indent,
+ size_t FirstLineIndentedBy) {
+ std::pair<StringRef, StringRef> Split = HelpStr.split('\n');
+ outs().indent(Indent - FirstLineIndentedBy) << " - " << Split.first << "\n";
+ while (!Split.second.empty()) {
+ Split = Split.second.split('\n');
+ outs().indent(Indent) << Split.first << "\n";
+ }
+}
+
// Print out the option for the alias.
void alias::printOptionInfo(size_t GlobalWidth) const {
- size_t L = std::strlen(ArgStr);
outs() << " -" << ArgStr;
- outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+ printHelpStr(HelpStr, GlobalWidth, std::strlen(ArgStr) + 6);
}
//===----------------------------------------------------------------------===//
@@ -946,7 +1123,7 @@ void basic_parser_impl::printOptionInfo(const Option &O,
if (const char *ValName = getValueName())
outs() << "=<" << getValueStr(O, ValName) << '>';
- outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n';
+ printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O));
}
void basic_parser_impl::printOptionName(const Option &O,
@@ -1087,9 +1264,8 @@ size_t generic_parser_base::getOptionWidth(const Option &O) const {
void generic_parser_base::printOptionInfo(const Option &O,
size_t GlobalWidth) const {
if (O.hasArgStr()) {
- size_t L = std::strlen(O.ArgStr);
outs() << " -" << O.ArgStr;
- outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n';
+ printHelpStr(O.HelpStr, GlobalWidth, std::strlen(O.ArgStr) + 6);
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8;
@@ -1100,9 +1276,9 @@ void generic_parser_base::printOptionInfo(const Option &O,
if (O.HelpStr[0])
outs() << " " << O.HelpStr << '\n';
for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
- size_t L = std::strlen(getOption(i));
- outs() << " -" << getOption(i);
- outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n';
+ const char *Option = getOption(i);
+ outs() << " -" << Option;
+ printHelpStr(getDescription(i), GlobalWidth, std::strlen(Option) + 8);
}
}
}
OpenPOWER on IntegriCloud