From a4c19d68f13cf0a83bc0da53bd6d547fcaf635fe Mon Sep 17 00:00:00 2001 From: ed Date: Mon, 22 Jun 2009 08:08:12 +0000 Subject: Update LLVM sources to r73879. --- tools/CMakeLists.txt | 1 + tools/Makefile | 3 +- tools/gold/gold-plugin.cpp | 17 +- tools/llc/llc.cpp | 4 + tools/lli/lli.cpp | 5 + tools/llvm-mc/AsmLexer.cpp | 258 ++++++++++++++++++++++++++ tools/llvm-mc/AsmLexer.h | 109 +++++++++++ tools/llvm-mc/AsmParser.cpp | 351 ++++++++++++++++++++++++++++++++++++ tools/llvm-mc/AsmParser.h | 48 +++++ tools/llvm-mc/CMakeLists.txt | 7 + tools/llvm-mc/Makefile | 17 ++ tools/llvm-mc/llvm-mc.cpp | 161 +++++++++++++++++ tools/llvmc/doc/LLVMC-Reference.rst | 76 +++++--- tools/llvmc/doc/LLVMC-Tutorial.rst | 21 ++- tools/llvmc/driver/Makefile | 4 +- tools/lto/LTOCodeGenerator.cpp | 23 ++- 16 files changed, 1047 insertions(+), 58 deletions(-) create mode 100644 tools/llvm-mc/AsmLexer.cpp create mode 100644 tools/llvm-mc/AsmLexer.h create mode 100644 tools/llvm-mc/AsmParser.cpp create mode 100644 tools/llvm-mc/AsmParser.h create mode 100644 tools/llvm-mc/CMakeLists.txt create mode 100644 tools/llvm-mc/Makefile create mode 100644 tools/llvm-mc/llvm-mc.cpp (limited to 'tools') diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 5c1ee35..7191d80 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -9,6 +9,7 @@ endif() add_subdirectory(opt) add_subdirectory(llvm-as) add_subdirectory(llvm-dis) +add_subdirectory(llvm-mc) add_subdirectory(llc) add_subdirectory(llvm-ranlib) diff --git a/tools/Makefile b/tools/Makefile index b3c015f..5ed090e 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -20,7 +20,8 @@ PARALLEL_DIRS := opt llvm-as llvm-dis \ llc llvm-ranlib llvm-ar llvm-nm \ llvm-ld llvm-prof llvm-link \ lli gccas gccld llvm-extract llvm-db \ - bugpoint llvm-bcanalyzer llvm-stub llvmc + bugpoint llvm-bcanalyzer llvm-stub llvmc \ + llvm-mc # Let users override the set of tools to build from the command line. ifdef ONLY_TOOLS diff --git a/tools/gold/gold-plugin.cpp b/tools/gold/gold-plugin.cpp index 46b1717..8d8fcd2 100644 --- a/tools/gold/gold-plugin.cpp +++ b/tools/gold/gold-plugin.cpp @@ -18,6 +18,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/System/Path.h" +#include "llvm/System/Program.h" #include #include @@ -44,7 +45,6 @@ namespace { int gold_version = 0; bool generate_api_file = false; - const char *gcc_path = NULL; const char *as_path = NULL; struct claimed_file { @@ -103,13 +103,6 @@ ld_plugin_status onload(ld_plugin_tv *tv) { case LDPT_OPTION: if (strcmp("generate-api-file", tv->tv_u.tv_string) == 0) { generate_api_file = true; - } else if (strncmp("gcc=", tv->tv_u.tv_string, 4) == 0) { - if (gcc_path) { - (*message)(LDPL_WARNING, "Path to gcc specified twice. " - "Discarding %s", tv->tv_u.tv_string); - } else { - gcc_path = strdup(tv->tv_u.tv_string + 4); - } } else if (strncmp("as=", tv->tv_u.tv_string, 3) == 0) { if (as_path) { (*message)(LDPL_WARNING, "Path to as specified twice. " @@ -352,10 +345,10 @@ ld_plugin_status all_symbols_read_hook(void) { lto_codegen_set_pic_model(cg, output_type); lto_codegen_set_debug_model(cg, LTO_DEBUG_MODEL_DWARF); - if (gcc_path) - lto_codegen_set_gcc_path(cg, gcc_path); - if (as_path) - lto_codegen_set_assembler_path(cg, as_path); + if (as_path) { + sys::Path p = sys::Program::FindProgramByName(as_path); + lto_codegen_set_assembler_path(cg, p.c_str()); + } size_t bufsize = 0; const char *buffer = static_cast(lto_codegen_compile(cg, diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp index c630331..e71b378 100644 --- a/tools/llc/llc.cpp +++ b/tools/llc/llc.cpp @@ -38,6 +38,7 @@ #include "llvm/System/Signals.h" #include "llvm/Config/config.h" #include "llvm/LinkAllVMCore.h" +#include "llvm/Target/TargetSelect.h" #include #include #include @@ -214,6 +215,9 @@ int main(int argc, char **argv) { llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. cl::ParseCommandLineOptions(argc, argv, "llvm system compiler\n"); + InitializeAllTargets(); + InitializeAllAsmPrinters(); + // Load the module to be compiled... std::string ErrorMessage; std::auto_ptr M; diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp index 6d3cbbc..afd3c5a 100644 --- a/tools/lli/lli.cpp +++ b/tools/lli/lli.cpp @@ -28,6 +28,7 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/System/Process.h" #include "llvm/System/Signals.h" +#include "llvm/Target/TargetSelect.h" #include #include using namespace llvm; @@ -137,6 +138,10 @@ int main(int argc, char **argv, char * const *envp) { case '2': OLvl = CodeGenOpt::Default; break; case '3': OLvl = CodeGenOpt::Aggressive; break; } + + // If we have a native target, initialize it to ensure it is linked in and + // usable by the JIT. + InitializeNativeTarget(); EE = ExecutionEngine::create(MP, ForceInterpreter, &ErrorMsg, OLvl); if (!EE && !ErrorMsg.empty()) { diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp new file mode 100644 index 0000000..0828594 --- /dev/null +++ b/tools/llvm-mc/AsmLexer.cpp @@ -0,0 +1,258 @@ +//===- AsmLexer.cpp - Lexer for Assembly Files ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "AsmLexer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include +#include +#include +using namespace llvm; + +AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + TokStart = 0; +} + +SMLoc AsmLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +void AsmLexer::PrintMessage(SMLoc Loc, const std::string &Msg) const { + SrcMgr.PrintMessage(Loc, Msg); +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return asmtok::Error. +asmtok::TokKind AsmLexer::ReturnError(const char *Loc, const std::string &Msg) { + SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg); + return asmtok::Error; +} + +int AsmLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } + } +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +asmtok::TokKind AsmLexer::LexIdentifier() { + while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' || + *CurPtr == '.' || *CurPtr == '@') + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Include % + return asmtok::Identifier; +} + +/// LexPercent: Register: %[a-zA-Z0-9]+ +asmtok::TokKind AsmLexer::LexPercent() { + if (!isalnum(*CurPtr)) + return ReturnError(TokStart, "invalid register name"); + while (isalnum(*CurPtr)) + ++CurPtr; + CurStrVal.assign(TokStart, CurPtr); // Skip % + return asmtok::Register; +} + +/// LexSlash: Slash: / +/// C-Style Comment: /* ... */ +asmtok::TokKind AsmLexer::LexSlash() { + if (*CurPtr != '*') + return asmtok::Slash; + + // C Style comment. + ++CurPtr; // skip the star. + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + return ReturnError(TokStart, "unterminated comment"); + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + return LexToken(); + } + } +} + +/// LexHash: Comment: #[^\n]* +asmtok::TokKind AsmLexer::LexHash() { + int CurChar = getNextChar(); + while (CurChar != '\n' && CurChar != '\n' && CurChar != EOF) + CurChar = getNextChar(); + + if (CurChar == EOF) + return asmtok::Eof; + return asmtok::EndOfStatement; +} + + +/// LexDigit: First character is [0-9]. +/// Local Label: [0-9][:] +/// Forward/Backward Label: [0-9][fb] +/// Binary integer: 0b[01]+ +/// Octal integer: 0[0-7]+ +/// Hex integer: 0x[0-9a-fA-F]+ +/// Decimal integer: [1-9][0-9]* +/// TODO: FP literal. +asmtok::TokKind AsmLexer::LexDigit() { + if (*CurPtr == ':') + return ReturnError(TokStart, "FIXME: local label not implemented"); + if (*CurPtr == 'f' || *CurPtr == 'b') + return ReturnError(TokStart, "FIXME: directional label not implemented"); + + // Decimal integer: [1-9][0-9]* + if (CurPtr[-1] != '0') { + while (isdigit(*CurPtr)) + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 10); + return asmtok::IntVal; + } + + if (*CurPtr == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return asmtok::IntVal; + } + + if (*CurPtr == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(CurPtr-2, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(CurPtr-2, "Hexadecimal number out of range"); + } + return asmtok::IntVal; + } + + // Must be an octal number, it starts with 0. + while (*CurPtr >= '0' && *CurPtr <= '7') + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 8); + return asmtok::IntVal; +} + +/// LexQuote: String: "..." +asmtok::TokKind AsmLexer::LexQuote() { + int CurChar = getNextChar(); + // TODO: does gas allow multiline string constants? + while (CurChar != '"') { + if (CurChar == '\\') { + // Allow \", etc. + CurChar = getNextChar(); + } + + if (CurChar == EOF) + return ReturnError(TokStart, "unterminated string constant"); + + CurChar = getNextChar(); + } + + CurStrVal.assign(TokStart, CurPtr); // include quotes. + return asmtok::String; +} + + +asmtok::TokKind AsmLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle identifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* + if (isalpha(CurChar) || CurChar == '_' || CurChar == '.') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "invalid character in input"); + case EOF: return asmtok::Eof; + case 0: + case ' ': + case '\t': + // Ignore whitespace. + return LexToken(); + case '\n': // FALL THROUGH. + case '\r': // FALL THROUGH. + case ';': return asmtok::EndOfStatement; + case ':': return asmtok::Colon; + case '+': return asmtok::Plus; + case '-': return asmtok::Minus; + case '~': return asmtok::Tilde; + case '(': return asmtok::LParen; + case ')': return asmtok::RParen; + case '*': return asmtok::Star; + case ',': return asmtok::Comma; + case '$': return asmtok::Dollar; + case '%': return LexPercent(); + case '/': return LexSlash(); + case '#': return LexHash(); + case '"': return LexQuote(); + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + return LexDigit(); + + // TODO: Quoted identifiers (objc methods etc) + // local labels: [0-9][:] + // Forward/backward labels: [0-9][fb] + // Integers, fp constants, character constants. + } +} diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h new file mode 100644 index 0000000..a6c9323 --- /dev/null +++ b/tools/llvm-mc/AsmLexer.h @@ -0,0 +1,109 @@ +//===- AsmLexer.h - Lexer for Assembly Files --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the lexer for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMLEXER_H +#define ASMLEXER_H + +#include "llvm/Support/DataTypes.h" +#include +#include + +namespace llvm { +class MemoryBuffer; +class SourceMgr; +class SMLoc; + +namespace asmtok { + enum TokKind { + // Markers + Eof, Error, + + // String values. + Identifier, + Register, + String, + + // Integer values. + IntVal, + + // No-value. + EndOfStatement, + Colon, + Plus, Minus, Tilde, + Slash, // '/' + LParen, RParen, + Star, Comma, Dollar + }; +} + +/// AsmLexer - Lexer class for assembly files. +class AsmLexer { + SourceMgr &SrcMgr; + + const char *CurPtr; + const MemoryBuffer *CurBuf; + + // Information about the current token. + const char *TokStart; + asmtok::TokKind CurKind; + std::string CurStrVal; // This is valid for Identifier. + int64_t CurIntVal; + + /// CurBuffer - This is the current buffer index we're lexing from as managed + /// by the SourceMgr object. + int CurBuffer; + +public: + AsmLexer(SourceMgr &SrcMgr); + ~AsmLexer() {} + + asmtok::TokKind Lex() { + return CurKind = LexToken(); + } + + asmtok::TokKind getKind() const { return CurKind; } + bool is(asmtok::TokKind K) const { return CurKind == K; } + bool isNot(asmtok::TokKind K) const { return CurKind != K; } + + const std::string &getCurStrVal() const { + assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register || + CurKind == asmtok::String) && + "This token doesn't have a string value"); + return CurStrVal; + } + int64_t getCurIntVal() const { + assert(CurKind == asmtok::IntVal && "This token isn't an integer"); + return CurIntVal; + } + + SMLoc getLoc() const; + + void PrintMessage(SMLoc Loc, const std::string &Msg) const; + +private: + int getNextChar(); + asmtok::TokKind ReturnError(const char *Loc, const std::string &Msg); + + /// LexToken - Read the next token and return its code. + asmtok::TokKind LexToken(); + asmtok::TokKind LexIdentifier(); + asmtok::TokKind LexPercent(); + asmtok::TokKind LexSlash(); + asmtok::TokKind LexHash(); + asmtok::TokKind LexDigit(); + asmtok::TokKind LexQuote(); +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp new file mode 100644 index 0000000..715ff39 --- /dev/null +++ b/tools/llvm-mc/AsmParser.cpp @@ -0,0 +1,351 @@ +//===- AsmParser.cpp - Parser for Assembly Files --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class implements the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#include "AsmParser.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +bool AsmParser::Error(SMLoc L, const char *Msg) { + Lexer.PrintMessage(L, Msg); + return true; +} + +bool AsmParser::TokError(const char *Msg) { + Lexer.PrintMessage(Lexer.getLoc(), Msg); + return true; +} + +bool AsmParser::Run() { + // Prime the lexer. + Lexer.Lex(); + + while (Lexer.isNot(asmtok::Eof)) + if (ParseStatement()) + return true; + + return false; +} + +/// EatToEndOfStatement - Throw away the rest of the line for testing purposes. +void AsmParser::EatToEndOfStatement() { + while (Lexer.isNot(asmtok::EndOfStatement) && + Lexer.isNot(asmtok::Eof)) + Lexer.Lex(); + + // Eat EOL. + if (Lexer.is(asmtok::EndOfStatement)) + Lexer.Lex(); +} + + +struct AsmParser::X86Operand { + enum { + Register, + Immediate, + Memory + } Kind; + + union { + struct { + unsigned RegNo; + } Reg; + + struct { + // FIXME: Should be a general expression. + int64_t Val; + } Imm; + + struct { + unsigned SegReg; + int64_t Disp; // FIXME: Should be a general expression. + unsigned BaseReg; + unsigned Scale; + unsigned ScaleReg; + } Mem; + }; + + static X86Operand CreateReg(unsigned RegNo) { + X86Operand Res; + Res.Kind = Register; + Res.Reg.RegNo = RegNo; + return Res; + } + static X86Operand CreateImm(int64_t Val) { + X86Operand Res; + Res.Kind = Immediate; + Res.Imm.Val = Val; + return Res; + } + static X86Operand CreateMem(unsigned SegReg, int64_t Disp, unsigned BaseReg, + unsigned Scale, unsigned ScaleReg) { + X86Operand Res; + Res.Kind = Memory; + Res.Mem.SegReg = SegReg; + Res.Mem.Disp = Disp; + Res.Mem.BaseReg = BaseReg; + Res.Mem.Scale = Scale; + Res.Mem.ScaleReg = ScaleReg; + return Res; + } +}; + +bool AsmParser::ParseX86Operand(X86Operand &Op) { + switch (Lexer.getKind()) { + default: + return ParseX86MemOperand(Op); + case asmtok::Register: + // FIXME: Decode reg #. + // FIXME: if a segment register, this could either be just the seg reg, or + // the start of a memory operand. + Op = X86Operand::CreateReg(123); + Lexer.Lex(); // Eat register. + return false; + case asmtok::Dollar: { + // $42 -> immediate. + Lexer.Lex(); + int64_t Val; + if (ParseExpression(Val)) + return TokError("expected integer constant"); + Op = X86Operand::CreateReg(Val); + return false; + case asmtok::Star: + Lexer.Lex(); // Eat the star. + + if (Lexer.is(asmtok::Register)) { + Op = X86Operand::CreateReg(123); + Lexer.Lex(); // Eat register. + } else if (ParseX86MemOperand(Op)) + return true; + + // FIXME: Note that these are 'dereferenced' so that clients know the '*' is + // there. + return false; + } + } +} + +/// ParseX86MemOperand: segment: disp(basereg, indexreg, scale) +bool AsmParser::ParseX86MemOperand(X86Operand &Op) { + // FIXME: If SegReg ':' (e.g. %gs:), eat and remember. + unsigned SegReg = 0; + + + // We have to disambiguate a parenthesized expression "(4+5)" from the start + // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The + // only way to do this without lookahead is to eat the ( and see what is after + // it. + int64_t Disp = 0; + if (Lexer.isNot(asmtok::LParen)) { + if (ParseExpression(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (Lexer.isNot(asmtok::LParen)) { + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0); + return false; + } + + // Eat the '('. + Lexer.Lex(); + } else { + // Okay, we have a '('. We don't know if this is an expression or not, but + // so we have to eat the ( to see beyond it. + Lexer.Lex(); // Eat the '('. + + if (Lexer.is(asmtok::Register) || Lexer.is(asmtok::Comma)) { + // Nothing to do here, fall into the code below with the '(' part of the + // memory operand consumed. + } else { + // It must be an parenthesized expression, parse it now. + if (ParseParenExpr(Disp)) return true; + + // After parsing the base expression we could either have a parenthesized + // memory address or not. If not, return now. If so, eat the (. + if (Lexer.isNot(asmtok::LParen)) { + Op = X86Operand::CreateMem(SegReg, Disp, 0, 0, 0); + return false; + } + + // Eat the '('. + Lexer.Lex(); + } + } + + // If we reached here, then we just ate the ( of the memory operand. Process + // the rest of the memory operand. + unsigned BaseReg = 0, ScaleReg = 0, Scale = 0; + + if (Lexer.is(asmtok::Register)) { + BaseReg = 123; // FIXME: decode reg # + Lexer.Lex(); // eat the register. + } + + if (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // eat the comma. + + if (Lexer.is(asmtok::Register)) { + ScaleReg = 123; // FIXME: decode reg # + Lexer.Lex(); // eat the register. + Scale = 1; // If not specified, the scale defaults to 1. + } + + if (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // eat the comma. + + // If present, get and validate scale amount. + if (Lexer.is(asmtok::IntVal)) { + int64_t ScaleVal = Lexer.getCurIntVal(); + if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 && ScaleVal != 8) + return TokError("scale factor in address must be 1, 2, 4 or 8"); + Lexer.Lex(); // eat the scale. + Scale = (unsigned)ScaleVal; + } + } + } + + // Ok, we've eaten the memory operand, verify we have a ')' and eat it too. + if (Lexer.isNot(asmtok::RParen)) + return TokError("unexpected token in memory operand"); + Lexer.Lex(); // Eat the ')'. + + Op = X86Operand::CreateMem(SegReg, Disp, BaseReg, Scale, ScaleReg); + return false; +} + +/// ParseParenExpr - Parse a paren expression and return it. +/// NOTE: This assumes the leading '(' has already been consumed. +/// +/// parenexpr ::= expr) +/// +bool AsmParser::ParseParenExpr(int64_t &Res) { + if (ParseExpression(Res)) return true; + if (Lexer.isNot(asmtok::RParen)) + return TokError("expected ')' in parentheses expression"); + Lexer.Lex(); + return false; +} + +/// ParsePrimaryExpr - Parse a primary expression and return it. +/// primaryexpr ::= (parenexpr +/// primaryexpr ::= symbol +/// primaryexpr ::= number +/// primaryexpr ::= ~,+,- primaryexpr +bool AsmParser::ParsePrimaryExpr(int64_t &Res) { + switch (Lexer.getKind()) { + default: + return TokError("unknown token in expression"); + case asmtok::Identifier: + // This is a label, this should be parsed as part of an expression, to + // handle things like LFOO+4 + Res = 0; // FIXME. + Lexer.Lex(); // Eat identifier. + return false; + case asmtok::IntVal: + Res = Lexer.getCurIntVal(); + Lexer.Lex(); // Eat identifier. + return false; + case asmtok::LParen: + Lexer.Lex(); // Eat the '('. + return ParseParenExpr(Res); + case asmtok::Tilde: + case asmtok::Plus: + case asmtok::Minus: + Lexer.Lex(); // Eat the operator. + return ParsePrimaryExpr(Res); + } +} + +/// ParseExpression - Parse an expression and return it. +/// +/// expr ::= expr +,- expr -> lowest. +/// expr ::= expr |,^,&,! expr -> middle. +/// expr ::= expr *,/,%,<<,>> expr -> highest. +/// expr ::= primaryexpr +/// +bool AsmParser::ParseExpression(int64_t &Res) { + return ParsePrimaryExpr(Res); +} + + + + +/// ParseStatement: +/// ::= EndOfStatement +/// ::= Label* Directive ...Operands... EndOfStatement +/// ::= Label* Identifier OperandList* EndOfStatement +bool AsmParser::ParseStatement() { + switch (Lexer.getKind()) { + default: + return TokError("unexpected token at start of statement"); + case asmtok::EndOfStatement: + Lexer.Lex(); + return false; + case asmtok::Identifier: + break; + // TODO: Recurse on local labels etc. + } + + // If we have an identifier, handle it as the key symbol. + SMLoc IDLoc = Lexer.getLoc(); + std::string IDVal = Lexer.getCurStrVal(); + + // Consume the identifier, see what is after it. + if (Lexer.Lex() == asmtok::Colon) { + // identifier ':' -> Label. + Lexer.Lex(); + return ParseStatement(); + } + + // Otherwise, we have a normal instruction or directive. + if (IDVal[0] == '.') { + Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now"); + EatToEndOfStatement(); + return false; + } + + // If it's an instruction, parse an operand list. + std::vector Operands; + + // Read the first operand, if present. Note that we require a newline at the + // end of file, so we don't have to worry about Eof here. + if (Lexer.isNot(asmtok::EndOfStatement)) { + X86Operand Op; + if (ParseX86Operand(Op)) + return true; + Operands.push_back(Op); + } + + while (Lexer.is(asmtok::Comma)) { + Lexer.Lex(); // Eat the comma. + + // Parse and remember the operand. + X86Operand Op; + if (ParseX86Operand(Op)) + return true; + Operands.push_back(Op); + } + + if (Lexer.isNot(asmtok::EndOfStatement)) + return TokError("unexpected token in operand list"); + + // Eat the end of statement marker. + Lexer.Lex(); + + // Instruction is good, process it. + outs() << "Found instruction: " << IDVal << " with " << Operands.size() + << " operands.\n"; + + // Skip to end of line for now. + return false; +} diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h new file mode 100644 index 0000000..82eb433 --- /dev/null +++ b/tools/llvm-mc/AsmParser.h @@ -0,0 +1,48 @@ +//===- AsmParser.h - Parser for Assembly Files ------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class declares the parser for assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef ASMPARSER_H +#define ASMPARSER_H + +#include "AsmLexer.h" + +namespace llvm { + +class AsmParser { + AsmLexer Lexer; + struct X86Operand; + +public: + AsmParser(SourceMgr &SM) : Lexer(SM) {} + ~AsmParser() {} + + bool Run(); + +private: + bool ParseStatement(); + + bool Error(SMLoc L, const char *Msg); + bool TokError(const char *Msg); + + void EatToEndOfStatement(); + + bool ParseX86Operand(X86Operand &Op); + bool ParseX86MemOperand(X86Operand &Op); + bool ParseExpression(int64_t &Res); + bool ParsePrimaryExpr(int64_t &Res); + bool ParseParenExpr(int64_t &Res); +}; + +} // end namespace llvm + +#endif diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt new file mode 100644 index 0000000..d8195e7 --- /dev/null +++ b/tools/llvm-mc/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LLVM_LINK_COMPONENTS support) + +add_llvm_tool(llvm-mc + llvm-mc.cpp + AsmLexer.cpp + AsmParser.cpp + ) diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile new file mode 100644 index 0000000..7b4d944 --- /dev/null +++ b/tools/llvm-mc/Makefile @@ -0,0 +1,17 @@ +##===- tools/llvm-mc/Makefile ------------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../.. +TOOLNAME = llvm-mc +LINK_COMPONENTS := support + +# This tool has no plugins, optimize startup time. +TOOL_NO_EXPORTS = 1 + +include $(LEVEL)/Makefile.common diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp new file mode 100644 index 0000000..52205c4 --- /dev/null +++ b/tools/llvm-mc/llvm-mc.cpp @@ -0,0 +1,161 @@ +//===-- llvm-mc.cpp - Machine Code Hacking Driver -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This utility is a simple driver that allows command line hacking on machine +// code. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/System/Signals.h" +#include "AsmParser.h" +using namespace llvm; + +static cl::opt +InputFilename(cl::Positional, cl::desc(""), cl::init("-")); + +static cl::opt +OutputFilename("o", cl::desc("Output filename"), + cl::value_desc("filename")); + +static cl::list +IncludeDirs("I", cl::desc("Directory of include files"), + cl::value_desc("directory"), cl::Prefix); + +enum ActionType { + AC_AsLex, + AC_Assemble +}; + +static cl::opt +Action(cl::desc("Action to perform:"), + cl::init(AC_Assemble), + cl::values(clEnumValN(AC_AsLex, "as-lex", + "Lex tokens from a .s file"), + clEnumValN(AC_Assemble, "assemble", + "Assemble a .s file (default)"), + clEnumValEnd)); + +static int AsLexInput(const char *ProgName) { + std::string ErrorMessage; + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, + &ErrorMessage); + if (Buffer == 0) { + errs() << ProgName << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "input file didn't read correctly.\n"; + return 1; + } + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what TGParser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + // Record the location of the include directories so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + AsmLexer Lexer(SrcMgr); + + bool Error = false; + + asmtok::TokKind Tok = Lexer.Lex(); + while (Tok != asmtok::Eof) { + switch (Tok) { + default: + Lexer.PrintMessage(Lexer.getLoc(), "driver: unknown token"); + Error = true; + break; + case asmtok::Error: + Error = true; // error already printed. + break; + case asmtok::Identifier: + outs() << "identifier: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::Register: + outs() << "register: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::String: + outs() << "string: " << Lexer.getCurStrVal() << '\n'; + break; + case asmtok::IntVal: + outs() << "int: " << Lexer.getCurIntVal() << '\n'; + break; + case asmtok::EndOfStatement: outs() << "EndOfStatement\n"; break; + case asmtok::Colon: outs() << "Colon\n"; break; + case asmtok::Plus: outs() << "Plus\n"; break; + case asmtok::Minus: outs() << "Minus\n"; break; + case asmtok::Tilde: outs() << "Tilde\n"; break; + case asmtok::Slash: outs() << "Slash\n"; break; + case asmtok::LParen: outs() << "LParen\n"; break; + case asmtok::RParen: outs() << "RParen\n"; break; + case asmtok::Star: outs() << "Star\n"; break; + case asmtok::Comma: outs() << "Comma\n"; break; + case asmtok::Dollar: outs() << "Dollar\n"; break; + } + + Tok = Lexer.Lex(); + } + + return Error; +} + +static int AssembleInput(const char *ProgName) { + std::string ErrorMessage; + MemoryBuffer *Buffer = MemoryBuffer::getFileOrSTDIN(InputFilename, + &ErrorMessage); + if (Buffer == 0) { + errs() << ProgName << ": "; + if (ErrorMessage.size()) + errs() << ErrorMessage << "\n"; + else + errs() << "input file didn't read correctly.\n"; + return 1; + } + + SourceMgr SrcMgr; + + // Tell SrcMgr about this buffer, which is what TGParser will pick up. + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + + // Record the location of the include directories so that the lexer can find + // it later. + SrcMgr.setIncludeDirs(IncludeDirs); + + AsmParser Parser(SrcMgr); + return Parser.Run(); +} + + +int main(int argc, char **argv) { + // Print a stack trace if we signal out. + sys::PrintStackTraceOnErrorSignal(); + PrettyStackTraceProgram X(argc, argv); + llvm_shutdown_obj Y; // Call llvm_shutdown() on exit. + cl::ParseCommandLineOptions(argc, argv, "llvm machine code playground\n"); + + switch (Action) { + default: + case AC_AsLex: + return AsLexInput(argv[0]); + case AC_Assemble: + return AssembleInput(argv[0]); + } + + return 0; +} + diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst index d99fa0c..7befe8f 100644 --- a/tools/llvmc/doc/LLVMC-Reference.rst +++ b/tools/llvmc/doc/LLVMC-Reference.rst @@ -33,7 +33,7 @@ example, as a build tool for game resources. Because LLVMC employs TableGen_ as its configuration language, you need to be familiar with it to customize LLVMC. -.. _TableGen: http://llvm.cs.uiuc.edu/docs/TableGenFundamentals.html +.. _TableGen: http://llvm.org/docs/TableGenFundamentals.html Compiling with LLVMC @@ -48,12 +48,12 @@ you shouldn't be able to notice them:: $ ./a.out hello -One nice feature of LLVMC is that one doesn't have to distinguish -between different compilers for different languages (think ``g++`` and -``gcc``) - the right toolchain is chosen automatically based on input -language names (which are, in turn, determined from file -extensions). If you want to force files ending with ".c" to compile as -C++, use the ``-x`` option, just like you would do it with ``gcc``:: +One nice feature of LLVMC is that one doesn't have to distinguish between +different compilers for different languages (think ``g++`` vs. ``gcc``) - the +right toolchain is chosen automatically based on input language names (which +are, in turn, determined from file extensions). If you want to force files +ending with ".c" to compile as C++, use the ``-x`` option, just like you would +do it with ``gcc``:: $ # hello.c is really a C++ file $ llvmc -x c++ hello.c @@ -94,9 +94,9 @@ configuration libraries: * ``--check-graph`` - Check the compilation for common errors like mismatched output/input language names, multiple default edges and cycles. Because of - plugins, these checks can't be performed at compile-time. Exit with code zero if - no errors were found, and return the number of found errors otherwise. Hidden - option, useful for debugging LLVMC plugins. + plugins, these checks can't be performed at compile-time. Exit with code zero + if no errors were found, and return the number of found errors + otherwise. Hidden option, useful for debugging LLVMC plugins. * ``--view-graph`` - Show a graphical representation of the compilation graph and exit. Requires that you have ``dot`` and ``gv`` programs installed. Hidden @@ -104,8 +104,9 @@ configuration libraries: * ``--write-graph`` - Write a ``compilation-graph.dot`` file in the current directory with the compilation graph description in Graphviz format (identical - to the file used by the ``--view-graph`` option). The ``-o`` option can be used - to set the output file name. Hidden option, useful for debugging LLVMC plugins. + to the file used by the ``--view-graph`` option). The ``-o`` option can be + used to set the output file name. Hidden option, useful for debugging LLVMC + plugins. * ``--save-temps`` - Write temporary files to the current directory and do not delete them on exit. Hidden option, useful for debugging. @@ -113,7 +114,6 @@ configuration libraries: * ``--help``, ``--help-hidden``, ``--version`` - These options have their standard meaning. - Compiling LLVMC plugins ======================= @@ -146,29 +146,55 @@ generic:: $ mv Simple.td MyPlugin.td -Note that the plugin source directory must be placed under -``$LLVMC_DIR/plugins`` to make use of the existing build -infrastructure. To build a version of the LLVMC executable called -``mydriver`` with your plugin compiled in, use the following command:: - - $ cd $LLVMC_DIR - $ make BUILTIN_PLUGINS=MyPlugin DRIVER_NAME=mydriver - To build your plugin as a dynamic library, just ``cd`` to its source directory and run ``make``. The resulting file will be called -``LLVMC$(LLVMC_PLUGIN).$(DLL_EXTENSION)`` (in our case, -``LLVMCMyPlugin.so``). This library can be then loaded in with the +``plugin_llvmc_$(LLVMC_PLUGIN).$(DLL_EXTENSION)`` (in our case, +``plugin_llvmc_MyPlugin.so``). This library can be then loaded in with the ``-load`` option. Example:: $ cd $LLVMC_DIR/plugins/Simple $ make - $ llvmc -load $LLVM_DIR/Release/lib/LLVMCSimple.so + $ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so + +Compiling standalone LLVMC-based drivers +======================================== + +By default, the ``llvmc`` executable consists of a driver core plus several +statically linked plugins (``Base`` and ``Clang`` at the moment). You can +produce a standalone LLVMC-based driver executable by linking the core with your +own plugins. The recommended way to do this is by starting with the provided +``Skeleton`` example (``$LLVMC_DIR/example/Skeleton``):: + + $ cd $LLVMC_DIR/example/ + $ cp -r Skeleton mydriver + $ cd mydriver + $ vim Makefile + [...] + $ make + +If you're compiling LLVM with different source and object directories, then you +must perform the following additional steps before running ``make``:: + + # LLVMC_SRC_DIR = $LLVM_SRC_DIR/tools/llvmc/ + # LLVMC_OBJ_DIR = $LLVM_OBJ_DIR/tools/llvmc/ + $ cp $LLVMC_SRC_DIR/example/mydriver/Makefile \ + $LLVMC_OBJ_DIR/example/mydriver/ + $ cd $LLVMC_OBJ_DIR/example/mydriver + $ make + +Another way to do the same thing is by using the following command:: + + $ cd $LLVMC_DIR + $ make LLVMC_BUILTIN_PLUGINS=MyPlugin LLVMC_BASED_DRIVER_NAME=mydriver + +This works with both srcdir == objdir and srcdir != objdir, but assumes that the +plugin source directory was placed under ``$LLVMC_DIR/plugins``. Sometimes, you will want a 'bare-bones' version of LLVMC that has no built-in plugins. It can be compiled with the following command:: $ cd $LLVMC_DIR - $ make BUILTIN_PLUGINS="" + $ make LLVMC_BUILTIN_PLUGINS="" Customizing LLVMC: the compilation graph diff --git a/tools/llvmc/doc/LLVMC-Tutorial.rst b/tools/llvmc/doc/LLVMC-Tutorial.rst index 6f06477..e7e8f08 100644 --- a/tools/llvmc/doc/LLVMC-Tutorial.rst +++ b/tools/llvmc/doc/LLVMC-Tutorial.rst @@ -46,23 +46,28 @@ Using LLVMC to generate toolchain drivers LLVMC plugins are written mostly using TableGen_, so you need to be familiar with it to get anything done. -.. _TableGen: http://llvm.cs.uiuc.edu/docs/TableGenFundamentals.html +.. _TableGen: http://llvm.org/docs/TableGenFundamentals.html -Start by compiling ``plugins/Simple/Simple.td``, which is a primitive -wrapper for ``gcc``:: +Start by compiling ``example/Simple``, which is a primitive wrapper for +``gcc``:: $ cd $LLVM_DIR/tools/llvmc - $ make DRIVER_NAME=mygcc BUILTIN_PLUGINS=Simple + $ cp -r example/Simple plugins/Simple + + # NB: A less verbose way to compile standalone LLVMC-based drivers is + # described in the reference manual. + + $ make LLVMC_BASED_DRIVER_NAME=mygcc LLVMC_BUILTIN_PLUGINS=Simple $ cat > hello.c [...] $ mygcc hello.c $ ./hello.out Hello -Here we link our plugin with the LLVMC core statically to form an -executable file called ``mygcc``. It is also possible to build our -plugin as a standalone dynamic library; this is described in the -reference manual. +Here we link our plugin with the LLVMC core statically to form an executable +file called ``mygcc``. It is also possible to build our plugin as a dynamic +library to be loaded by the ``llvmc`` executable (or any other LLVMC-based +standalone driver); this is described in the reference manual. Contents of the file ``Simple.td`` look like this:: diff --git a/tools/llvmc/driver/Makefile b/tools/llvmc/driver/Makefile index 3dd373a..5f5ec53 100644 --- a/tools/llvmc/driver/Makefile +++ b/tools/llvmc/driver/Makefile @@ -10,10 +10,10 @@ LEVEL = ../../.. TOOLNAME = $(LLVMC_BASED_DRIVER_NAME) -LLVMLIBS = CompilerDriver +LLVMLIBS = CompilerDriver.a ifneq ($(LLVMC_BUILTIN_PLUGINS),) -USEDLIBS += $(patsubst %,plugin_llvmc_%,$(LLVMC_BUILTIN_PLUGINS)) +USEDLIBS += $(patsubst %,plugin_llvmc_%.a,$(LLVMC_BUILTIN_PLUGINS)) endif LINK_COMPONENTS = support system diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp index 6f8a028..11e0e55 100644 --- a/tools/lto/LTOCodeGenerator.cpp +++ b/tools/lto/LTOCodeGenerator.cpp @@ -16,13 +16,18 @@ #include "LTOCodeGenerator.h" -#include "llvm/Module.h" -#include "llvm/PassManager.h" -#include "llvm/Linker.h" #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" +#include "llvm/Linker.h" +#include "llvm/Module.h" #include "llvm/ModuleProvider.h" +#include "llvm/PassManager.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/Verifier.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/FileWriters.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Mangler.h" #include "llvm/Support/MemoryBuffer.h" @@ -30,25 +35,21 @@ #include "llvm/Support/SystemUtils.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/LoopPass.h" -#include "llvm/Analysis/Verifier.h" -#include "llvm/CodeGen/FileWriters.h" #include "llvm/Target/SubtargetFeature.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetAsmInfo.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachineRegistry.h" -#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetSelect.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/Scalar.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" +#include #include #include -#include #include @@ -74,6 +75,8 @@ LTOCodeGenerator::LTOCodeGenerator() _codeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), _nativeObjectFile(NULL), _gccPath(NULL), _assemblerPath(NULL) { + InitializeAllTargets(); + InitializeAllAsmPrinters(); } -- cgit v1.1