diff options
Diffstat (limited to 'src/llvm/include')
33 files changed, 6567 insertions, 0 deletions
diff --git a/src/llvm/include/InnerLoopAnalysis.h b/src/llvm/include/InnerLoopAnalysis.h new file mode 100644 index 0000000..f11225d --- /dev/null +++ b/src/llvm/include/InnerLoopAnalysis.h @@ -0,0 +1,291 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __INNERLOOPANALYSIS_H +#define __INNERLOOPANALYSIS_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm-types.h" + + +class InductionDesc { + /* Start value. */ + Value *StartValue; + /* Step value. */ + const SCEV *Step; + +public: + InductionDesc() : StartValue(nullptr), Step(nullptr) {} + InductionDesc(Value *Start, const SCEV *Step) + : StartValue(Start), Step(Step) {} + + Value *getStartValue() const { return StartValue; } + const SCEV *getStep() const { return Step; } +}; + +class ReductionDesc { +public: + + enum ReductionKind { + NoReduction, /* Not a reduction. */ + IntegerAdd, /* Sum of numbers. */ + IntegerMult, /* Product of numbers. */ + IntegerOr, /* Bitwise or logical OR of numbers. */ + IntegerAnd, /* Bitwise or logical AND of numbers. */ + IntegerXor, /* Bitwise or logical XOR of numbers. */ + FloatAdd, /* Sum of float numbers. */ + FloatMult, /* Product of float numbers. */ + }; + + ReductionDesc() + : StartValue(nullptr), LoopExitInstr(nullptr), + Kind(ReductionKind::NoReduction), Ty(nullptr) {} + ReductionDesc(Value *Start, Instruction *Exit, ReductionKind K, Type *Ty) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), Ty(Ty) {} + + Value *getStartValue() const { return StartValue; } + Value *getNextValue() const { return LoopExitInstr; } + Instruction *getLoopExitInstr() { return LoopExitInstr; } + ReductionKind getReductionKind() { return Kind; } + Type *getScalarType() { return Ty; } + +private: + /* The starting value of the recurrence. */ + Value *StartValue; + /* The instruction who's value is used outside the loop. */ + Instruction *LoopExitInstr; + /* The kind of the recurrence.*/ + ReductionKind Kind; + /* The scalar type. */ + Type *Ty; +}; + +/* + * The InnertLoop class represents a single innertmost loop. The InnerLoop has + * a special shape that is specific to the DBT decoded guest loop, and its loop + * definition is different to a nature loop, e.g., latch and exiting block. + */ +class InnerLoop { +public: + typedef std::map<PHINode *, InductionDesc> InductionList; + typedef std::map<PHINode *, ReductionDesc> ReductionList; + +private: + Loop &TheLoop; + + /* The list of blocks in this loop. First entry is the header node. */ + std::vector<BasicBlock *> Blocks; + SmallPtrSet<const BasicBlock *, 8> DenseBlockSet; + + std::vector<BasicBlock *> Latches; + std::map<BasicBlock *, BasicBlock *> SplitLatches; + + bool UnknownPhi; + InductionList Inductions; + ReductionList Reductions; + + void addInduction(PHINode *Phi, Value *Start, const SCEV *Step) { + Inductions[Phi] = InductionDesc(Start, Step); + } + + void addReduction(PHINode *Phi, Value *Start, Instruction *Exit, + ReductionDesc::ReductionKind K, Type *Ty) { + Reductions[Phi] = ReductionDesc(Start, Exit, K, Ty); + } + + InnerLoop(const InnerLoop &) = delete; + const InnerLoop& operator=(const InnerLoop &) = delete; + + friend class InnerLoopAnalysis; + +public: + InnerLoop(Loop *loop); + ~InnerLoop() {} + + Loop &getLoop() const { return TheLoop; } + + BasicBlock *getHeader() const { return Blocks.front(); } + + /* Return true if the specified basic block is in this loop. */ + bool contains(const BasicBlock *BB) const { + return DenseBlockSet.count(BB); + } + + /* Return true if the specified instruction is in this loop. */ + bool contains(const Instruction *Inst) const { + return contains(Inst->getParent()); + } + + /* Get a list of the basic blocks which make up this loop. */ + typedef typename std::vector<BasicBlock*>::const_iterator block_iterator; + const std::vector<BasicBlock*> &getBlocks() const { return Blocks; } + block_iterator block_begin() const { return Blocks.begin(); } + block_iterator block_end() const { return Blocks.end(); } + inline iterator_range<block_iterator> blocks() const { + return make_range(block_begin(), block_end()); + } + + /* Get the number of blocks in this loop in constant time. */ + unsigned getNumBlocks() const { return Blocks.size(); } + + /* True if terminator in the block can branch to another block that is + * outside of the current loop. */ + bool isLoopExiting(BasicBlock *BB) const; + + /* Calculate the number of back edges to the loop header. */ + unsigned getNumBackEdges() const; + + /* Return all blocks inside the loop that have successors outside of the + * loop. */ + void getExitingBlocks(SmallVectorImpl<BasicBlock *> &ExitingBlocks) const; + + /* If getExitingBlocks would return exactly one block, return that block. + * Otherwise return null. */ + BasicBlock *getExitingBlock() const; + + /* Return all of the successor blocks of this loop. */ + void getExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const; + + /* If getExitBlocks would return exactly one block, return that block. + * Otherwise return null. */ + BasicBlock *getExitBlock() const; + + /* If there is a preheader for this loop, return it. A loop has a preheader + * if there is only one edge to the header of the loop from outside of the + * loop. If this is the case, the block branching to the header of the loop + * is the preheader node. + * + * This method returns null if there is no preheader for the loop. */ + BasicBlock *getLoopPreheader() const; + + /* If the given loop's header has exactly one unique predecessor outside the + * loop, return it. Otherwise return null. + * This is less strict that the loop "preheader" concept, which requires + * the predecessor to have exactly one successor. */ + BasicBlock *getLoopPredecessor() const; + + unsigned getNumLoopLatches() const { return Latches.size(); } + unsigned getNumSplitLatches() const { return SplitLatches.size(); } + + /* Return all loop latch blocks of this loop. A latch block is a block that + * contains a branch back to the header. */ + void getLoopLatches(SmallVectorImpl<BasicBlock *> &LoopLatches) const { + for (auto I : Latches) + LoopLatches.push_back(I); + } + + /* If there is a latch tail, return it. */ + BasicBlock *getSingleLatchTail() const { + return (SplitLatches.size() == 1) ? SplitLatches.begin()->first : + nullptr; + } + + /* If there is a latch head, return it. */ + BasicBlock *getSingleLatchHead() const { + return (SplitLatches.size() == 1) ? SplitLatches.begin()->second : + nullptr; + } + + /* Return all of the latch tails of this loop. */ + void getLatchTails(SmallVectorImpl<BasicBlock *> &LatchTails) const { + for (auto &I : SplitLatches) + LatchTails.push_back(I.first); + } + + /* Given a latch tail, return its latch head. */ + BasicBlock *getLatchHead(BasicBlock *BB) { + if (SplitLatches.find(BB) == SplitLatches.end()) + return nullptr; + return SplitLatches[BB]; + } + + /* If the given phi is an induction of the loop, return the induciton. */ + InductionDesc *getInduction(PHINode *Phi) { + if (Inductions.find(Phi) == Inductions.end()) + return nullptr; + return &Inductions[Phi]; + } + + /* If the given phi is a reduction of the loop, return the induciton. */ + ReductionDesc *getReduction(PHINode *Phi) { + if (Reductions.find(Phi) == Reductions.end()) + return nullptr; + return &Reductions[Phi]; + } + + /* Return true if the loop has unknown phi(s). A loop has unknown phi(s) if + * a phi node is not identified, or the loop has no preheader or latch tail. + * + * If the loop has unknown phi(s), the data structure of Inductions and + * Reductions can be undefined. */ + bool hasUnknownPhi() { return UnknownPhi; } + + /* Return true if the instruction `From' can flow to instruction `To' in + * the loop. */ + bool isReachable(Instruction *From, Instruction *To); +}; + +class InnerLoopAnalysis { + std::vector<InnerLoop *> InnerLoops; + + void analyzePhi(InnerLoop &TheLoop, ScalarEvolution *SE); + bool analyzeInduction(InnerLoop &TheLoop, ScalarEvolution *SE, PHINode *Phi); + bool analyzeReduction(InnerLoop &TheLoop, PHINode *Phi); + +public: + InnerLoopAnalysis() {} + ~InnerLoopAnalysis() { releaseMemory(); } + + void releaseMemory() { + while (!InnerLoops.empty()) { + InnerLoop *L = InnerLoops.back(); + InnerLoops.pop_back(); + delete L; + } + } + void print(raw_ostream &OS, const Module * = nullptr) const {} + void verify() const {} + void analyze(LoopInfo *LI, ScalarEvolution *SE); + + /* iterator/begin/end - The interface to the innermost loops. */ + typedef typename std::vector<InnerLoop *>::const_iterator iterator; + typedef typename std::vector<InnerLoop *>::const_reverse_iterator + reverse_iterator; + iterator begin() const { return InnerLoops.begin(); } + iterator end() const { return InnerLoops.end(); } + reverse_iterator rbegin() const { return InnerLoops.rbegin(); } + reverse_iterator rend() const { return InnerLoops.rend(); } + bool empty() const { return InnerLoops.empty(); } + unsigned size() { return InnerLoops.size(); } +}; + +/* + * InnerLoopAnalysisWrapperPass Pass + */ +class InnerLoopAnalysisWrapperPass : public FunctionPass { + InnerLoopAnalysis LA; + +public: + static char ID; + InnerLoopAnalysisWrapperPass() : FunctionPass(ID) { + initializeInnerLoopAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + InnerLoopAnalysis &getLoopAnalysis() { return LA; } + const InnerLoopAnalysis &getLoopAnalysis() const { return LA; } + + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module * = nullptr) const override; + void verifyAnalysis() const override; + bool runOnFunction(Function &F) override; +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/JIT.h b/src/llvm/include/JIT.h new file mode 100644 index 0000000..a1b3c8d --- /dev/null +++ b/src/llvm/include/JIT.h @@ -0,0 +1,228 @@ +//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the top-level JIT data structure. +// +//===----------------------------------------------------------------------===// + +#ifndef __JIT_H +#define __JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/PassManager.h" + +namespace llvm { + +class Function; +struct JITEvent_EmittedFunctionDetails; +class MachineCodeEmitter; +class MachineCodeInfo; +class TargetJITInfo; +class TargetMachine; + +class JITState { +private: + FunctionPassManager PM; // Passes to compile a function + Module *M; // Module used to create the PM + + /// PendingFunctions - Functions which have not been code generated yet, but + /// were called from a function being code generated. + std::vector<AssertingVH<Function> > PendingFunctions; + +public: + explicit JITState(Module *M) : PM(M), M(M) {} + + FunctionPassManager &getPM() { + return PM; + } + + Module *getModule() const { return M; } + std::vector<AssertingVH<Function> > &getPendingFunctions() { + return PendingFunctions; + } +}; + + +class JIT : public ExecutionEngine { + /// types + typedef ValueMap<const BasicBlock *, void *> + BasicBlockAddressMapTy; + /// data + TargetMachine &TM; // The current target we are compiling to + TargetJITInfo &TJI; // The JITInfo for the target we are compiling to + JITCodeEmitter *JCE; // JCE object + JITMemoryManager *JMM; + std::vector<JITEventListener*> EventListeners; + + /// AllocateGVsWithCode - Some applications require that global variables and + /// code be allocated into the same region of memory, in which case this flag + /// should be set to true. Doing so breaks freeMachineCodeForFunction. + bool AllocateGVsWithCode; + + /// True while the JIT is generating code. Used to assert against recursive + /// entry. + bool isAlreadyCodeGenerating; + + JITState *jitstate; + + /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their + /// actualized version, only filled for basic blocks that have their address + /// taken. + BasicBlockAddressMapTy BasicBlockAddressMap; + + + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, bool AllocateGVsWithCode); +public: + ~JIT(); + + static void Register() { + JITCtor = createJIT; + } + + /// getJITInfo - Return the target JIT information structure. + /// + TargetJITInfo &getJITInfo() const { return TJI; } + + /// create - Create an return a new JIT compiler if there is one available + /// for the current target. Otherwise, return null. + /// + static ExecutionEngine *create(Module *M, + std::string *Err, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel = + CodeGenOpt::Default, + bool GVsWithCode = true, + Reloc::Model RM = Reloc::Default, + CodeModel::Model CMM = CodeModel::JITDefault) { + return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, + RM, CMM); + } + + void addModule(Module *M) override; + + /// removeModule - Remove a Module from the list of modules. Returns true if + /// M is found. + bool removeModule(Module *M) override; + + /// runFunction - Start execution with the specified function and arguments. + /// + GenericValue runFunction(Function *F, + const std::vector<GenericValue> &ArgValues) override; + + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the MemoryManager. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) override; + + // CompilationCallback - Invoked the first time that a call site is found, + // which causes lazy compilation of the target function. + // + static void CompilationCallback(); + + /// getPointerToFunction - This returns the address of the specified function, + /// compiling it if necessary. + /// + void *getPointerToFunction(Function *F) override; + + /// addPointerToBasicBlock - Adds address of the specific basic block. + void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); + + /// clearPointerToBasicBlock - Removes address of specific basic block. + void clearPointerToBasicBlock(const BasicBlock *BB); + + /// getPointerToBasicBlock - This returns the address of the specified basic + /// block, assuming function is compiled. + void *getPointerToBasicBlock(BasicBlock *BB) override; + + /// getOrEmitGlobalVariable - Return the address of the specified global + /// variable, possibly emitting it to memory if needed. This is used by the + /// Emitter. + void *getOrEmitGlobalVariable(const GlobalVariable *GV) override; + + /// getPointerToFunctionOrStub - If the specified function has been + /// code-gen'd, return a pointer to the function. If not, compile it, or use + /// a stub to implement lazy compilation if available. + /// + void *getPointerToFunctionOrStub(Function *F) override; + + /// recompileAndRelinkFunction - This method is used to force a function + /// which has already been compiled, to be compiled again, possibly + /// after it has been modified. Then the entry to the old copy is overwritten + /// with a branch to the new copy. If there was no old copy, this acts + /// just like JIT::getPointerToFunction(). + /// + void *recompileAndRelinkFunction(Function *F) override; + + /// freeMachineCodeForFunction - deallocate memory used to code-generate this + /// Function. + /// + void freeMachineCodeForFunction(Function *F) override; + + /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd + /// function was encountered. Add it to a pending list to be processed after + /// the current function. + /// + void addPendingFunction(Function *F); + + /// getCodeEmitter - Return the code emitter this JIT is emitting into. + /// + JITCodeEmitter *getCodeEmitter() const { return JCE; } + + static ExecutionEngine *createJIT(Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM); + + // Run the JIT on F and return information about the generated code + void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override; + + void RegisterJITEventListener(JITEventListener *L) override; + void UnregisterJITEventListener(JITEventListener *L) override; + + TargetMachine *getTargetMachine() override { return &TM; } + + /// These functions correspond to the methods on JITEventListener. They + /// iterate over the registered listeners and call the corresponding method on + /// each. + void NotifyFunctionEmitted( + const Function &F, void *Code, size_t Size, + const JITEvent_EmittedFunctionDetails &Details); + void NotifyFreeingMachineCode(void *OldPtr); + + BasicBlockAddressMapTy & + getBasicBlockAddressMap() { + return BasicBlockAddressMap; + } + + +private: + static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, + TargetMachine &tm); + void runJITOnFunctionUnlocked(Function *F); + void updateFunctionStubUnlocked(Function *F); + void jitTheFunctionUnlocked(Function *F); + +protected: + + /// getMemoryforGV - Allocate memory for a global variable. + char* getMemoryForGV(const GlobalVariable* GV) override; + +}; + +} // End llvm namespace + +#endif diff --git a/src/llvm/include/JITMemoryManager.h b/src/llvm/include/JITMemoryManager.h new file mode 100644 index 0000000..301d227 --- /dev/null +++ b/src/llvm/include/JITMemoryManager.h @@ -0,0 +1,318 @@ +//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DefaultJITMemoryManager class. +// +//===----------------------------------------------------------------------===// + +#ifndef __JITMEMORYMANAGER_H +#define __JITMEMORYMANAGER_H + +#include <sys/mman.h> +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm-debug.h" +#include "utils.h" + +using namespace llvm; + +#define MIN_CODE_CACHE_SIZE (1 * 1024 * 1024) +#define DEFAULT_GLOBAL_SIZE (64 * 1024) +#define DEFAULT_THRESHOLD (32 * 1024) + + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector<void (*)()> AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +// +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) && defined(__GLIBC__) +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + + +/// DefaultJITMemoryManager - Manage trace cache memory for the JIT code generation. +class DefaultJITMemoryManager : public JITMemoryManager { + uint8_t *TraceCache; + size_t TraceCacheSize; + + uint8_t *GlobalBase; /* section for global data used by QEMU helpers */ + uint8_t *CodeBase; /* section for emitting trace code */ + uint8_t *CodeGenPtr; + + size_t GlobalRemain; + size_t CodeRemain; + size_t Threshold; + + hqemu::Mutex lock; + +public: + DefaultJITMemoryManager(uint8_t *Cache, size_t Size) + : TraceCache(Cache), TraceCacheSize(Size), Threshold(DEFAULT_THRESHOLD) + { + GlobalBase = TraceCache; + GlobalRemain = DEFAULT_GLOBAL_SIZE; + + CodeBase = GlobalBase + DEFAULT_GLOBAL_SIZE; + CodeBase = (uint8_t *)(((uintptr_t)CodeBase + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + CodeGenPtr = CodeBase; + } + + ~DefaultJITMemoryManager() {} + + //===----------------------------------------------------------------------===// + // + /// getPointerToNamedFunction - This method returns the address of the specified + /// function by using the dynamic loader interface. As such it is only useful + /// for resolving library symbols, not code generated symbols. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) override { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return nullptr; + } + + void AllocateGOT() override { hqemu_error("fixme.\n"); } + + // Testing methods. + bool CheckInvariants(std::string &ErrorStr) override { hqemu_error("fixme.\n"); return false; } + size_t GetDefaultCodeSlabSize() override { hqemu_error("fixme.\n"); return 0; } + size_t GetDefaultDataSlabSize() override { hqemu_error("fixme.\n"); return 0; } + size_t GetDefaultStubSlabSize() override { hqemu_error("fixme.\n"); return 0; } + unsigned GetNumCodeSlabs() override { hqemu_error("fixme.\n"); return 0; } + unsigned GetNumDataSlabs() override { hqemu_error("fixme.\n"); return 0; } + unsigned GetNumStubSlabs() override { hqemu_error("fixme.\n"); return 0; } + + /// startFunctionBody - When a function starts, allocate a block of free + /// executable memory, returning a pointer to it and its actual size. + uint8_t *startFunctionBody(const Function *F, + uintptr_t &ActualSize) override { + lock.acquire(); + if (unlikely(CodeRemain < Threshold)) + hqemu_error("internal error (fixme).\n"); + + ActualSize = CodeRemain; + return CodeGenPtr; + } + + /// endFunctionBody - The function F is now allocated, and takes the memory + /// in the range [FunctionStart,FunctionEnd). + void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd) override { + assert(FunctionEnd > FunctionStart); + + size_t GenSize = FunctionEnd - FunctionStart; + if (unlikely(GenSize > CodeRemain)) + hqemu_error("exceeds available cache size.\n"); + + CodeGenPtr = (uint8_t *)(((uintptr_t)CodeGenPtr + GenSize + CODE_GEN_ALIGN - 1) + & ~(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeGenPtr; + lock.release(); + } + + /// allocateSpace - Allocate a memory block of the given size. This method + /// cannot be called between calls to startFunctionBody and endFunctionBody. + uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) override { + hqemu_error("fixme.\n"); + return nullptr; + } + + /// allocateStub - Allocate memory for a function stub. + uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment) override { + return allocateGlobal(StubSize, Alignment); + } + + /// allocateGlobal - Allocate memory for a global. + uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) override { + hqemu::MutexGuard locked(lock); + + if (!Alignment) + Alignment = 16; + if (Alignment & (Alignment - 1)) + hqemu_error("alignment must be a power of two.\n"); + + unsigned MisAligned = ((intptr_t)GlobalBase & (Alignment - 1)); + if (MisAligned) + MisAligned = Alignment - MisAligned; + + if (GlobalRemain < Size + MisAligned) + hqemu_error("exceeds available global size.\n"); + + uint8_t *GlobalPtr = GlobalBase + MisAligned; + GlobalBase = GlobalPtr + Size; + GlobalRemain -= (Size + MisAligned); + return GlobalPtr; + } + + /// allocateCodeSection - Allocate memory for a code section. + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override { + hqemu_error("fixme.\n"); return nullptr; + } + + /// allocateDataSection - Allocate memory for a data section. + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) override { + hqemu_error("fixme.\n"); return nullptr; + } + + bool finalizeMemory(std::string *ErrMsg) override { return false; } + + uint8_t *getGOTBase() const override { return nullptr; } + + void deallocateBlock(void *Block) {} + + /// deallocateFunctionBody - Deallocate all memory for the specified + /// function body. + void deallocateFunctionBody(void *Body) override {} + + /// setMemoryWritable - When code generation is in progress, + /// the code pages may need permissions changed. + void setMemoryWritable() override {} + /// setMemoryExecutable - When code generation is done and we're ready to + /// start execution, the code pages may need permissions changed. + void setMemoryExecutable() override {} + + /// setPoisonMemory - Controls whether we write garbage over freed memory. + /// + void setPoisonMemory(bool poison) override {} + + size_t getCodeSize() { return CodeGenPtr - CodeBase; } + bool isSizeAvailable() { + hqemu::MutexGuard locked(lock); + return CodeRemain >= Threshold ? 1 : 0; + } + void Flush() { + CodeGenPtr = CodeBase; + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + } + + static DefaultJITMemoryManager *Create(uint8_t *Cache, size_t Size) { + if (Size < MIN_CODE_CACHE_SIZE) + hqemu_error("Trace cache size is too small.\n"); + return new DefaultJITMemoryManager(Cache, Size); + } +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/MCJITMemoryManager.h b/src/llvm/include/MCJITMemoryManager.h new file mode 100644 index 0000000..33059a5 --- /dev/null +++ b/src/llvm/include/MCJITMemoryManager.h @@ -0,0 +1,213 @@ +//===-- MCJITMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface of the MCJIT memory manager base class. +// +//===----------------------------------------------------------------------===// + +#ifndef __MCJITMEMORYMANAGER_H +#define __MCJITMEMORYMANAGER_H + +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm-debug.h" +#include "utils.h" + +using namespace llvm; + +#define MIN_CODE_CACHE_SIZE (1 * 1024 * 1024) +#define DEFAULT_GLOBAL_SIZE (64 * 1024) +#define DEFAULT_THRESHOLD (32 * 1024) + +// RuntimeDyld clients often want to handle the memory management of +// what gets placed where. For JIT clients, this is the subset of +// JITMemoryManager required for dynamic loading of binaries. +// +// FIXME: As the RuntimeDyld fills out, additional routines will be needed +// for the varying types of objects to be allocated. +class DefaultMCJITMemoryManager : public RTDyldMemoryManager { + uint8_t *TraceCache; + size_t TraceCacheSize; + + uint8_t *GlobalBase; /* section for global data used by QEMU helpers */ + uint8_t *CodeBase; /* section for emitting trace code */ + uint8_t *CodeGenPtr; + + size_t GlobalRemain; + size_t CodeRemain; + size_t Threshold; + + hqemu::Mutex lock; + + SymbolMap Symbols; + +public: + DefaultMCJITMemoryManager(uint8_t *Cache, size_t Size) + : TraceCache(Cache), TraceCacheSize(Size), Threshold(DEFAULT_THRESHOLD) + { + GlobalBase = TraceCache; + GlobalRemain = DEFAULT_GLOBAL_SIZE; + + CodeBase = GlobalBase + DEFAULT_GLOBAL_SIZE; + CodeBase = (uint8_t *)(((uintptr_t)CodeBase + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + CodeGenPtr = CodeBase; + } + ~DefaultMCJITMemoryManager() {} + + /// Allocate a memory block of (at least) the given size suitable for + /// executable code. The SectionID is a unique identifier assigned by the JIT + /// engine, and optionally recorded by the memory manager to access a loaded + /// section. + uint8_t *allocateCodeSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName) override { + hqemu::MutexGuard locked(lock); + + if (!Alignment) + Alignment = 16; + + if (Alignment & (Alignment - 1)) + hqemu_error("Alignment must be a power of two.\n"); + + uintptr_t CurGenPtr = (uintptr_t)CodeGenPtr; + CurGenPtr = (CurGenPtr + Alignment - 1) & ~(uintptr_t)(Alignment - 1); + CodeGenPtr = (uint8_t *)((CurGenPtr + Size + CODE_GEN_ALIGN - 1) & + ~(uintptr_t)(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeGenPtr; + return (uint8_t *)CurGenPtr; + } + + /// Allocate a memory block of (at least) the given size suitable for data. + /// The SectionID is a unique identifier assigned by the JIT engine, and + /// optionally recorded by the memory manager to access a loaded section. + uint8_t *allocateDataSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName, bool IsReadOnly) override { + return allocateCodeSection(Size, Alignment, SectionID, SectionName); + } + + /// Inform the memory manager about the total amount of memory required to + /// allocate all sections to be loaded: + /// \p CodeSize - the total size of all code sections + /// \p DataSizeRO - the total size of all read-only data sections + /// \p DataSizeRW - the total size of all read-write data sections + /// + /// Note that by default the callback is disabled. To enable it + /// redefine the method needsToReserveAllocationSpace to return true. + void reserveAllocationSpace( + uintptr_t CodeSize, uintptr_t DataSizeRO, uintptr_t DataSizeRW) { + hqemu_error("fixme.\n"); + } + + /// Override to return true to enable the reserveAllocationSpace callback. + bool needsToReserveAllocationSpace() { return false; } + + /// Register the EH frames with the runtime so that c++ exceptions work. + /// + /// \p Addr parameter provides the local address of the EH frame section + /// data, while \p LoadAddr provides the address of the data in the target + /// address space. If the section has not been remapped (which will usually + /// be the case for local execution) these two values will be the same. + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) { + hqemu_error("fixme.\n"); + } + + void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) { + hqemu_error("fixme.\n"); + } + + /// This method returns the address of the specified function or variable. + /// It is used to resolve symbols during module linking. + uint64_t getSymbolAddress(const std::string &Name) { + hqemu::MutexGuard locked(lock); + if (Symbols.find(Name) == Symbols.end()) { + std::string ErrMsg = "Program used external symbol '" + Name + + "'which could not be resolved!\n"; + hqemu_error(ErrMsg.c_str()); + } + return Symbols[Name]; + } + + /// This method returns the address of the specified function. As such it is + /// only useful for resolving library symbols, not code generated symbols. + /// + /// If \p AbortOnFailure is false and no function with the given name is + /// found, this function returns a null pointer. Otherwise, it prints a + /// message to stderr and aborts. + /// + /// This function is deprecated for memory managers to be used with + /// MCJIT or RuntimeDyld. Use getSymbolAddress instead. + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) { + if (AbortOnFailure) { + std::string ErrMsg = "Program used external symbol '" + Name + + "'which could not be resolved!\n"; + hqemu_error(ErrMsg.c_str()); + } + return nullptr; + } + + /// This method is called after an object has been loaded into memory but + /// before relocations are applied to the loaded sections. The object load + /// may have been initiated by MCJIT to resolve an external symbol for another + /// object that is being finalized. In that case, the object about which + /// the memory manager is being notified will be finalized immediately after + /// the memory manager returns from this call. + /// + /// Memory managers which are preparing code for execution in an external + /// address space can use this call to remap the section addresses for the + /// newly loaded object. +#if defined(LLVM_V35) + void notifyObjectLoaded(ExecutionEngine *EE, + const ObjectImage *Obj) { + } +#else + void notifyObjectLoaded(RuntimeDyld &RTDyld, + const object::ObjectFile &Obj) { + } +#endif + + /// This method is called when object loading is complete and section page + /// permissions can be applied. It is up to the memory manager implementation + /// to decide whether or not to act on this method. The memory manager will + /// typically allocate all sections as read-write and then apply specific + /// permissions when this method is called. Code sections cannot be executed + /// until this function has been called. In addition, any cache coherency + /// operations needed to reliably use the memory are also performed. + /// + /// Returns true if an error occurred, false otherwise. + bool finalizeMemory(std::string *ErrMsg = nullptr) override { + return false; + } + + void AddSymbols(SymbolMap &symbols) { + Symbols = symbols; + } + + size_t getCodeSize() { return CodeGenPtr - CodeBase; } + bool isSizeAvailable() { + hqemu::MutexGuard locked(lock); + return CodeRemain >= Threshold ? 1 : 0; + } + void Flush() { + CodeGenPtr = CodeBase; + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + } + + static DefaultMCJITMemoryManager *Create(uint8_t *Cache, size_t Size) { + if (Size < MIN_CODE_CACHE_SIZE) { + std::string ErrMsg = "Trace cache size is too small (" + + std::to_string(Size) + ")\n."; + hqemu_error(ErrMsg.c_str()); + } + return new DefaultMCJITMemoryManager(Cache, Size); + } +}; + +#endif diff --git a/src/llvm/include/hqemu-config.h b/src/llvm/include/hqemu-config.h new file mode 100644 index 0000000..2e2f42f --- /dev/null +++ b/src/llvm/include/hqemu-config.h @@ -0,0 +1,142 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __HQEMU_CONFIG_H +#define __HQEMU_CONFIG_H + + +#define PACKAGE_NAME "HQEMU" +#define PACKAGE_VERSION_MAJOR "2.5" +#define PACKAGE_VERSION_MINOR "2" + +#define ENABLE_IBTC +#define ENABLE_CPBL +#define ENABLE_LPAGE +#define ENABLE_PASSES +#define ENABLE_MCJIT +//#define ENABLE_HPM_THREAD +//#define ENABLE_TLBVERSION +//#define ENALBE_CPU_PROFILE +//#define USE_TRACETREE_ONLY + + +#if defined(CONFIG_USER_ONLY) +# define ENABLE_TCG_VECTOR +# define GUEST_BASE guest_base +#else +# define GUEST_BASE (0UL) +#endif + +#if defined(ENABLE_TLBVERSION) +# if defined(ALIGNED_ONLY) +# undef ENABLE_TLBVERSION +# elif HOST_LONG_BITS == 64 && TARGET_LONG_BITS == 32 && defined(HOST_X86_64) +# define ENABLE_TLBVERSION_EXT +# endif +#endif + +#ifndef ENABLE_TLBVERSION +# define TLB_INVALID_SHIFT 3 +# define TLB_NOTDIRTY_SHIFT 4 +# define TLB_MMIO_SHIFT 5 +# define TLB_VERSION_BITS 0 +# define TLB_VERSION_MASK 0 +# define TLB_VERSION_SHIFT (0) +# define tlb_version(__env) 0 +typedef target_ulong tlbaddr_t; +#elif defined(ENABLE_TLBVERSION_EXT) +# define TLB_INVALID_SHIFT 3 +# define TLB_NOTDIRTY_SHIFT 4 +# define TLB_MMIO_SHIFT 5 +# define TLB_VERSION_BITS 32 +# define TLB_VERSION_SIZE (1UL << TLB_VERSION_BITS) +# define TLB_VERSION_MASK (0xFFFFFFFF00000000UL) +# define TLB_VERSION_SHIFT (32) +# define tlb_version(__env) (__env->tlb_version) +typedef unsigned long tlbaddr_t; +#else +# define TLB_INVALID_SHIFT (TARGET_PAGE_BITS - 3) +# define TLB_NOTDIRTY_SHIFT (TARGET_PAGE_BITS - 2) +# define TLB_MMIO_SHIFT (TARGET_PAGE_BITS - 1) +# define TLB_VERSION_BITS (TARGET_PAGE_BITS - 3) +# define TLB_VERSION_SIZE (1 << TLB_VERSION_BITS) +# define TLB_VERSION_MASK (TLB_VERSION_SIZE - 1) +# define TLB_VERSION_SHIFT (0) +# define tlb_version(__env) (__env->tlb_version) +typedef target_ulong tlbaddr_t; +#endif + + +typedef int BlockID; +typedef int TraceID; +#define BUILD_NONE ((uint16_t)0) +#define BUILD_TCG ((uint16_t)1 << 0) +#define BUILD_LLVM ((uint16_t)1 << 1) + +#define CPU_OPTIMIZATION_COMMON \ + unsigned long sp; \ + void *opt_link; \ + uint16_t build_mode; \ + int start_trace_prediction; \ + int fallthrough; \ + uintptr_t image_base; \ + uint32_t restore_val; \ + uint64_t num_trace_exits; \ + + +#define TB_OPTIMIZATION_COMMON \ + BlockID id; \ + TraceID tid; /* trace id */ \ + int mode; /* current state */ \ + void *opt_ptr; /* pointer to the optimized code */ \ + uint32_t exec_count; /* trace profile execution count */ \ + uint16_t patch_jmp; /* offset of trace trampoline */ \ + uint16_t patch_next; /* offset of trace prediction stub */ \ + target_ulong jmp_pc[2]; /* pc of the succeeding blocks */ \ + void *image; \ + void *state; \ + void *chain; + + +enum { + BLOCK_NONE = 0, + BLOCK_ACTIVE, + BLOCK_TRACEHEAD, + BLOCK_OPTIMIZED, + BLOCK_INVALID, +}; + +enum { + TRANS_MODE_NONE = 0, + TRANS_MODE_BLOCK, + TRANS_MODE_HYBRIDS, + TRANS_MODE_HYBRIDM, + TRANS_MODE_INVALID, +}; + +/* Parse translation mode from env-variable LLVM_MODE. */ +static inline int getTransMode(void) { + char *p = getenv("LLVM_MODE"); + if (p == NULL) return TRANS_MODE_HYBRIDM; + if (!strcmp(p, "hybridm")) return TRANS_MODE_HYBRIDM; + if (!strcmp(p, "hybrids")) return TRANS_MODE_HYBRIDS; + if (!strcmp(p, "block")) return TRANS_MODE_BLOCK; + if (!strcmp(p, "none")) return TRANS_MODE_NONE; + return TRANS_MODE_INVALID; +} + +/* Annotation/attribute for traces. */ +enum { + A_None = ((uint32_t)0), + A_SetCC = ((uint32_t)1 << 0), + A_NoSIMDization = ((uint32_t)1 << 1), +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/hqemu-helper.h b/src/llvm/include/hqemu-helper.h new file mode 100644 index 0000000..dfcb396 --- /dev/null +++ b/src/llvm/include/hqemu-helper.h @@ -0,0 +1,8 @@ +DEF_HELPER_1(export_hqemu, void, env) +DEF_HELPER_1(lookup_ibtc, ptr, env) +DEF_HELPER_1(lookup_cpbl, ptr, env) +DEF_HELPER_3(validate_cpbl, int, env, tl, int) +DEF_HELPER_2(NET_profile, void, env, int) +DEF_HELPER_2(NET_predict, void, env, int) +DEF_HELPER_2(verify_tb, void, env, int) +DEF_HELPER_3(profile_exec, void, env, ptr, int) diff --git a/src/llvm/include/hqemu.h b/src/llvm/include/hqemu.h new file mode 100644 index 0000000..f5e7180 --- /dev/null +++ b/src/llvm/include/hqemu.h @@ -0,0 +1,84 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __HQEMU_H +#define __HQEMU_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "config-host.h" +#include "config-target.h" +#include "hqemu-config.h" + +#define build_tcg(_env) ((_env)->build_mode & BUILD_TCG) +#define build_llvm(_env) ((_env)->build_mode & BUILD_LLVM) +#define build_llvm_only(_env) ((_env)->build_mode == BUILD_LLVM) + +void hqemu_help(void); + +/* Optimizations */ +int optimization_init(CPUArchState *env); +int optimization_finalize(CPUArchState *env); +int optimization_reset(CPUArchState *env, int force_flush); +int optimization_remove_entry(CPUArchState *env, TranslationBlock *tb); +int optimization_flush_page(CPUArchState *env, target_ulong pc); +int optimization_init_tb(TranslationBlock *tb, int id); + +void itlb_update_entry(CPUArchState *env, TranslationBlock *tb); +void ibtc_update_entry(CPUArchState *env, TranslationBlock *tb); + +int lpt_reset(CPUArchState *env); +int lpt_add_page(CPUArchState *env, target_ulong addr, target_ulong size); +int lpt_search_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, target_ulong *sizep); +int lpt_flush_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, target_ulong *sizep); + + +/* Tracer */ +void tracer_exec_tb(CPUArchState *env, uintptr_t next_tb, TranslationBlock *tb); +void tracer_reset(CPUArchState *env); + + +/* LLVM */ +int llvm_init(void); +int llvm_finalize(void); +int llvm_alloc_cache(void); +int llvm_check_cache(void); +int llvm_tb_flush(void); +int llvm_tb_remove(TranslationBlock *tb); +void llvm_handle_chaining(uintptr_t next_tb, TranslationBlock *tb); +int llvm_locate_trace(uintptr_t searched_pc); +TranslationBlock *llvm_find_pc(CPUState *cpu, uintptr_t searched_pc); +int llvm_restore_state(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc); +void llvm_fork_start(void); +void llvm_fork_end(int child); + + +/* Annotation */ +enum { + ANNOTATION_NONE = 0, + ANNOTATION_LOOP, +}; +int llvm_has_annotation(target_ulong addr, int annotation); + + +/* External variables */ +extern int tracer_mode; +extern target_ulong pcid; +extern unsigned long alignment_count[]; /* 0: misaligned, 1: aligned. */ +extern unsigned long aligned_boundary; + +#ifdef __cplusplus +} +#endif + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/llvm-annotate.h b/src/llvm/include/llvm-annotate.h new file mode 100644 index 0000000..25454ed --- /dev/null +++ b/src/llvm/include/llvm-annotate.h @@ -0,0 +1,51 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_ANNOTATE_H +#define __LLVM_ANNOTATE_H + +#include <map> +#include <cstdint> +#include "qemu-types.h" +#include "llvm-types.h" +#include "utils.h" + +/* Loop metadata */ +struct LoopMetadata { + LoopMetadata() + : Address(-1), Length(-1), VS(-1), VF(-1), Distance(INT_MIN), Start(-1), + End(-1), Stride(-1) {} + target_ulong Address; + uint32_t Length; + uint32_t VS, VF; + int Distance; + int Start, End; + int Stride; +}; + +/* + * The AnnotationFactory class manages the metadata information. + */ +class AnnotationFactory { + typedef std::map<uintptr_t, LoopMetadata*> LoopList; + + std::string MetaFile; + + int ParseXML(const char *name); + +public: + AnnotationFactory(); + ~AnnotationFactory(); + + LoopList Loops; + LoopMetadata *getLoopAnnotation(target_ulong addr); + bool hasLoopAnnotation(target_ulong addr); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-debug.h b/src/llvm/include/llvm-debug.h new file mode 100644 index 0000000..405b466 --- /dev/null +++ b/src/llvm/include/llvm-debug.h @@ -0,0 +1,247 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_DEBUG_H +#define __LLVM_DEBUG_H + +#include <cstdint> +#include <cstring> +#include <iostream> +#include <sstream> +#include <cstdarg> +#include <unistd.h> +#include <sys/time.h> +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/Support/FileSystem.h" +#include "utils.h" + + +struct DebugMode { + uint64_t Mode; + + DebugMode(uint64_t M) : Mode(M) {} + + bool operator==(const DebugMode &RHS) const { + return Mode == RHS.Mode; + } + bool operator&(const DebugMode &RHS) const { + return Mode & RHS.Mode; + } + DebugMode operator|(const DebugMode &RHS) { + return DebugMode(Mode | RHS.Mode); + } + DebugMode &operator|=(const DebugMode &RHS) { + Mode |= RHS.Mode; + return *this; + } +}; + +/* + * LLVMDebug provides facilities to debug the LLVM translator, based on the + * debug levels. + */ +class LLVMDebug { +public: + enum LLVMDebugMode { + D_NONE = ((uint64_t)0), + D_LLVM = ((uint64_t)1 << 0), + D_INASM = ((uint64_t)1 << 1), + D_OP = ((uint64_t)1 << 2), + D_OUTASM = ((uint64_t)1 << 3), + D_IR = ((uint64_t)1 << 4), + D_IR_OPT = ((uint64_t)1 << 5), + D_ENTRY = ((uint64_t)1 << 6), + D_VERIFY = ((uint64_t)1 << 7), + D_PASS = ((uint64_t)1 << 8), + D_ANNOTATE = ((uint64_t)1 << 9), + D_HPM = ((uint64_t)1 << 10), + D_ASM = (D_INASM | D_OP | D_OUTASM), + D_DEBUG = (D_LLVM | D_IR_OPT | D_OUTASM | D_PASS), + D_ALL = (D_LLVM | D_INASM | D_OP | D_OUTASM | D_IR | D_IR_OPT | + D_ENTRY | D_VERIFY | D_PASS | D_ANNOTATE | D_HPM), + }; + + LLVMDebug() : Mode(D_NONE) + { + hqemu_out.reset(new llvm::raw_fd_ostream(STDOUT_FILENO, false, true)); + hqemu_dbg.reset(new llvm::raw_fd_ostream(STDERR_FILENO, false, true)); + + std::string Str(""); + gettimeofday(&uptime, nullptr); + ParseDebugMode(Str, false); + hqemu_null.SetUnbuffered(); + } + + DebugMode &getDebugMode() { + return Mode; + } + + DebugMode &getDebugMode(LLVMDebugMode M) { + if (Modes.find(M) == Modes.end()) + M = D_NONE; + return *Modes[M]; + } + + void setDebugMode(std::string &DebugLevel, std::string &DebugFile) { + ParseDebugMode(DebugLevel); + if (DebugFile != "") { + std::error_code EC; + auto OS = new llvm::raw_fd_ostream(DebugFile, EC, + llvm::sys::fs::F_Text); + if (EC) { + *hqemu_dbg << "Error: failed to open debug file " << DebugFile + << ". (" << EC.message().c_str() << ")\n"; + } + OS->SetUnbuffered(); + hqemu_dbg.reset(OS); + } + } + + void Flush() { + hqemu_dbg->flush(); + } + + void error(const char *fname, const char *fmt, ...) { + static char str[256] = {'\0'}; + va_list ap; + va_start(ap, fmt); + vsprintf(str, fmt, ap); + va_end(ap); + *hqemu_dbg << timestamp() << " Error: " << fname << " - " << str; + exit(0); + } + + llvm::raw_ostream &output() { + return *hqemu_out; + } + + llvm::raw_ostream &debug() { + return *hqemu_dbg; + } + + llvm::raw_ostream &operator<<(DebugMode &M) { + if (M & Mode) { + *hqemu_dbg << timestamp() << " "; + return *hqemu_dbg; + } + return hqemu_null; + }; + +private: + llvm::raw_null_ostream hqemu_null; + std::unique_ptr<llvm::raw_fd_ostream> hqemu_out; + std::unique_ptr<llvm::raw_fd_ostream> hqemu_dbg; + struct timeval uptime; /* The startup time of the DBT */ + DebugMode Mode; /* The debug level */ + std::map<LLVMDebugMode, DebugMode*> Modes; + + std::string timestamp() { + struct timeval tv; + char timestamp[32]; + gettimeofday(&tv, 0); + timersub(&tv, &uptime, &tv); + strftime(timestamp, 32, "[%H:%M:%S", gmtime(&tv.tv_sec)); + sprintf(timestamp + 9, ".%06ld]", tv.tv_usec); + return timestamp; + } + + void ParseDebugMode(std::string &DebugLevel, bool Update=true) { + static std::string debug_str[] = { + "none", "llvm", "in_asm", "op", "out_asm", "ir", "ir_opt", + "entry", "verify", "pass", "annotate", "hpm", "asm", "debug", + "all" + }; + static LLVMDebugMode debug_enum[] = { + D_NONE, D_LLVM, D_INASM, D_OP, D_OUTASM, D_IR, D_IR_OPT, + D_ENTRY, D_VERIFY, D_PASS, D_ANNOTATE, D_HPM, D_ASM, D_DEBUG, + D_ALL + }; + + if (!Update) { + for (auto M : debug_enum) + Modes[M] = new DebugMode(M); + return; + } + + if (DebugLevel.empty()) + return; + + std::istringstream ss(DebugLevel); + std::string token; + while(std::getline(ss, token, ',')) { + for (unsigned i = 0, e = ARRAY_SIZE(debug_enum); i != e; ++i) { + if (token == debug_str[i]) { + Mode |= getDebugMode(debug_enum[i]); + break; + } + } + } + } +}; + +extern LLVMDebug DM; + +/* Print messages to stdout. Should not use this function in release mode. */ +static inline llvm::raw_ostream &out() { + return DM.output(); +} +/* Print messages to stderr, controlled by DebugMode. */ +static inline LLVMDebug &dbg() { + return DM; +} +/* Print error messages to stderr and terminate the process. */ +#define hqemu_error(msg,args...) do { DM.error(__func__,msg,##args); } while(0) + +/* Macros to get defined DebugMode. */ +#define DEBUG_NONE DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_NONE) +#define DEBUG_LLVM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_LLVM) +#define DEBUG_INASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_INASM) +#define DEBUG_OP DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_OP) +#define DEBUG_OUTASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_OUTASM) +#define DEBUG_IR DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_IR) +#define DEBUG_IR_OPT DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_IR_OPT) +#define DEBUG_ENTRY DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ENTRY) +#define DEBUG_VERIFY DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_VERIFY) +#define DEBUG_PASS DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_PASS) +#define DEBUG_ANNOTATE DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ANNOTATE) +#define DEBUG_HPM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_HPM) +#define DEBUG_ASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ASM) +#define DEBUG_DEBUG DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_DEBUG) +#define DEBUG_ALL DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ALL) + + + +/* + * Binary disassembler using MCDisassembler. + */ +class MCDisasm { + const llvm::MCDisassembler *DisAsm; + const llvm::MCSubtargetInfo *STI; + llvm::MCInstPrinter *IP; + const llvm::MCInstrAnalysis *MIA; + bool HostDisAsm; + bool NoShowRawInsn; + + MCDisasm(const llvm::Target *TheTarget, std::string TripleName, + bool isHost); + + void DumpBytes(llvm::ArrayRef<uint8_t> bytes, llvm::raw_ostream &OS); + +public: + ~MCDisasm(); + void PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr); + void PrintOutAsm(uint64_t Addr, uint64_t Size); + + static MCDisasm *CreateMCDisasm(std::string TripleName, bool isHost); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-hard-perfmon.h b/src/llvm/include/llvm-hard-perfmon.h new file mode 100644 index 0000000..ac03b23 --- /dev/null +++ b/src/llvm/include/llvm-hard-perfmon.h @@ -0,0 +1,87 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_HARD_PERFMON_H +#define __LLVM_HARD_PERFMON_H + +#include <map> +#include <thread> +#include "pmu/pmu.h" +#include "utils.h" + +class PerfmonData; +class BaseTracer; + +enum HPMControl { + HPM_INIT = 0, + HPM_FINALIZE, + HPM_START, + HPM_STOP, +}; + +/* + * Hardware Performance Monitor (HPM) + */ +class HardwarePerfmon { + std::thread MonThread; /* Monitor thread */ + int MonThreadID; /* Monitor thread id */ + bool MonThreadStop; /* Monitor thread is stopped or not */ + hqemu::Mutex Lock; + + /* Start monitor thread. */ + void StartMonThread(); + + /* Monitor thread routine. */ + void MonitorFunc(); + +public: + HardwarePerfmon(); + ~HardwarePerfmon(); + + /* Set up HPM with the monitor thread id */ + void Init(int monitor_thread_tid); + + /* Register a thread to be monitored. */ + void RegisterThread(BaseTracer *Tracer); + + /* Unreigster a thread from being monitored. */ + void UnregisterThread(BaseTracer *Tracer); + + /* Notify that the execution enters/leaves the code cache. */ + void NotifyCacheEnter(BaseTracer *Tracer); + void NotifyCacheLeave(BaseTracer *Tracer); + + /* Stop the monitor. */ + void Pause(); + + /* Restart the monitor. */ + void Resume(); +}; + + +class PerfmonData { +public: + PerfmonData(int tid); + ~PerfmonData(); + + int TID; + pmu::Handle ICountHndl; + pmu::Handle BranchHndl; + pmu::Handle MemLoadHndl; + pmu::Handle MemStoreHndl; + pmu::Handle CoverSetHndl; + uint64_t LastNumBranches, LastNumLoads, LastNumStores; + + void MonitorBasic(HPMControl Ctl); + void MonitorCoverSet(HPMControl Ctl); +}; + +extern HardwarePerfmon *HP; + +#endif /* __LLVM_HARD_PERFMON_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-helper.h b/src/llvm/include/llvm-helper.h new file mode 100644 index 0000000..2d24f81 --- /dev/null +++ b/src/llvm/include/llvm-helper.h @@ -0,0 +1,755 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file defines the QEMU helper functions that could be inlined by + * the LLVM translators. + */ + +#ifndef __LLVM_HELPER_H +#define __LLVM_HELPER_H + +/* Speical TCG runtime helper */ + "tcg_helper_div_i32", + "tcg_helper_rem_i32", + "tcg_helper_divu_i32", + "tcg_helper_remu_i32", + "tcg_helper_shl_i64", + "tcg_helper_shr_i64", + "tcg_helper_sar_i64", + "tcg_helper_div_i64", + "tcg_helper_rem_i64", + "tcg_helper_divu_i64", + "tcg_helper_remu_i64", + +#if defined(TARGET_I386) + /* General */ + "helper_cc_compute_c", + "helper_cc_compute_all", + "helper_load_seg", + "helper_write_eflags", + "helper_read_eflags", + "helper_cli", + "helper_sti", + "helper_set_inhibit_irq", + "helper_reset_inhibit_irq", + /* FPU */ + "helper_divb_AL", + "helper_idivb_AL", + "helper_divw_AX", + "helper_idivw_AX", + "helper_divl_EAX", + "helper_idivl_EAX", + "helper_flds_FT0", + "helper_fldl_FT0", + "helper_fildl_FT0", + "helper_flds_ST0", + "helper_fldl_ST0", + "helper_fildl_ST0", + "helper_fildll_ST0", + "helper_fsts_ST0", + "helper_fstl_ST0", + "helper_fist_ST0", + "helper_fistl_ST0", + "helper_fistll_ST0", + "helper_fistt_ST0", + "helper_fisttl_ST0", + "helper_fisttll_ST0", + "helper_fldt_ST0", + "helper_fstt_ST0", + "helper_fpush", + "helper_fpop", + "helper_fdecstp", + "helper_fincstp", + "helper_ffree_STN", + "helper_fmov_ST0_FT0", + "helper_fmov_FT0_STN", + "helper_fmov_ST0_STN", + "helper_fmov_STN_ST0", + "helper_fxchg_ST0_STN", + "helper_fcom_ST0_FT0", + "helper_fucom_ST0_FT0", + "helper_fcomi_ST0_FT0", + "helper_fucomi_ST0_FT0", + "helper_fadd_ST0_FT0", + "helper_fmul_ST0_FT0", + "helper_fsub_ST0_FT0", + "helper_fsubr_ST0_FT0", + "helper_fdiv_ST0_FT0", + "helper_fdivr_ST0_FT0", + "helper_fadd_STN_ST0", + "helper_fmul_STN_ST0", + "helper_fsub_STN_ST0", + "helper_fsubr_STN_ST0", + "helper_fdiv_STN_ST0", + "helper_fdivr_STN_ST0", + "helper_fchs_ST0", + "helper_fabs_ST0", +#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS == 64 + "helper_fxam_ST0", +#endif + "helper_fld1_ST0", + "helper_fldl2t_ST0", + "helper_fldl2e_ST0", + "helper_fldpi_ST0", + "helper_fldlg2_ST0", + "helper_fldln2_ST0", + "helper_fldz_ST0", + "helper_fldz_FT0", + "helper_fnstsw", + "helper_fnstcw", + "helper_fldcw", + "helper_fclex", + "helper_fwait", + "helper_fninit", + "helper_fbld_ST0", + "helper_fbst_ST0", + "helper_f2xm1", + "helper_fyl2x", + "helper_fptan", + "helper_fpatan", + "helper_fxtract", + "helper_fprem1", + "helper_fprem", + "helper_fyl2xp1", + "helper_fsqrt", + "helper_fsincos", + "helper_frndint", + "helper_fscale", + "helper_fsin", + "helper_fcos", + "helper_fstenv", + "helper_fldenv", + "helper_fsave", + "helper_frstor", + "helper_fxsave", + "helper_fxrstor", + "helper_bsf", + "helper_bsr", + "helper_lzcnt", + + /* MMX/SSE */ + "helper_psrlw_xmm", + "helper_psraw_xmm", + "helper_psllw_xmm", + "helper_psrld_xmm", + "helper_psrad_xmm", + "helper_pslld_xmm", + "helper_psrlq_xmm", + "helper_psllq_xmm", + "helper_psrldq_xmm", + "helper_pslldq_xmm", + "helper_paddb_xmm", + "helper_paddw_xmm", + "helper_paddl_xmm", + "helper_paddq_xmm", + "helper_psubb_xmm", + "helper_psubw_xmm", + "helper_psubl_xmm", + "helper_psubq_xmm", + "helper_paddusb_xmm", + "helper_paddsb_xmm", + "helper_psubusb_xmm", + "helper_psubsb_xmm", + "helper_paddusw_xmm", + "helper_paddsw_xmm", + "helper_psubusw_xmm", + "helper_psubsw_xmm", + "helper_pminub_xmm", + "helper_pmaxub_xmm", + "helper_pminsw_xmm", + "helper_pmaxsw_xmm", + "helper_pand_xmm", + "helper_pandn_xmm", + "helper_por_xmm", + "helper_pxor_xmm", + "helper_pcmpgtb_xmm", + "helper_pcmpgtw_xmm", + "helper_pcmpgtl_xmm", + "helper_pcmpeqb_xmm", + "helper_pcmpeqw_xmm", + "helper_pcmpeql_xmm", + "helper_pmullw_xmm", + "helper_pmulhuw_xmm", + "helper_pmulhw_xmm", + "helper_pavgb_xmm", + "helper_pavgw_xmm", + "helper_pmuludq_xmm", + "helper_pmaddwd_xmm", + "helper_psadbw_xmm", + "helper_maskmov_xmm", + "helper_movl_mm_T0_xmm", + "helper_shufps_xmm", + "helper_shufpd_xmm", +#if !defined(TCG_TARGET_ARM) + "helper_pshufd_xmm", + "helper_pshuflw_xmm", + "helper_pshufhw_xmm", + "helper_punpcklbw_xmm", + "helper_punpcklwd_xmm", + "helper_punpckldq_xmm", + "helper_punpckhbw_xmm", + "helper_punpckhwd_xmm", + "helper_punpckhdq_xmm", +#endif + "helper_punpcklqdq_xmm", + "helper_punpckhqdq_xmm", + + "helper_enter_mmx", + "helper_psrlw_mmx", + "helper_psraw_mmx", + "helper_psllw_mmx", + "helper_psrld_mmx", + "helper_psrad_mmx", + "helper_pslld_mmx", + "helper_psrlq_mmx", + "helper_psllq_mmx", + "helper_psrldq_mmx", + "helper_pslldq_mmx", + "helper_paddb_mmx", + "helper_paddw_mmx", + "helper_paddl_mmx", + "helper_paddq_mmx", + "helper_psubb_mmx", + "helper_psubw_mmx", + "helper_psubl_mmx", + "helper_psubq_mmx", + "helper_paddusb_mmx", + "helper_paddsb_mmx", + "helper_psubusb_mmx", + "helper_psubsb_mmx", + "helper_paddusw_mmx", + "helper_paddsw_mmx", + "helper_psubusw_mmx", + "helper_psubsw_mmx", + "helper_pminub_mmx", + "helper_pmaxub_mmx", + "helper_pminsw_mmx", + "helper_pmaxsw_mmx", + "helper_pand_mmx", + "helper_pandn_mmx", + "helper_por_mmx", + "helper_pxor_mmx", + "helper_pcmpgtb_mmx", + "helper_pcmpgtw_mmx", + "helper_pcmpgtl_mmx", + "helper_pcmpeqb_mmx", + "helper_pcmpeqw_mmx", + "helper_pcmpeql_mmx", + "helper_pmullw_mmx", + "helper_pmulhuw_mmx", + "helper_pmulhw_mmx", + "helper_pavgb_mmx", + "helper_pavgw_mmx", + "helper_pmuludq_mmx", + "helper_pmaddwd_mmx", + "helper_psadbw_mmx", + "helper_maskmov_mmx", + "helper_movl_mm_T0_mmx", + "helper_shufps_mmx", + "helper_shufpd_mmx", +#if !defined(TCG_TARGET_ARM) + "helper_pshufd_mmx", + "helper_pshuflw_mmx", + "helper_pshufhw_mmx", + "helper_punpcklbw_mmx", + "helper_punpcklwd_mmx", + "helper_punpckldq_mmx", + "helper_punpckhbw_mmx", + "helper_punpckhwd_mmx", + "helper_punpckhdq_mmx", +#endif + "helper_punpcklqdq_mmx", + "helper_punpckhqdq_mmx", + + "helper_addps", + "helper_addss", + "helper_addpd", + "helper_addsd", + "helper_subps", + "helper_subss", + "helper_subpd", + "helper_subsd", + "helper_mulps", + "helper_mulss", + "helper_mulpd", + "helper_mulsd", + "helper_divps", + "helper_divss", + "helper_divpd", + "helper_divsd", + "helper_minps", + "helper_minss", + "helper_minpd", + "helper_minsd", + "helper_maxps", + "helper_maxss", + "helper_maxpd", + "helper_maxsd", + "helper_sqrtps", + "helper_sqrtss", + "helper_sqrtpd", + "helper_sqrtsd", + "helper_shufps", + "helper_shufpd", + + "helper_cmpeqps", + "helper_cmpeqss", + "helper_cmpeqpd", + "helper_cmpeqsd", + "helper_cmpltps", + "helper_cmpltss", + "helper_cmpltpd", + "helper_cmpltsd", + "helper_cmpleps", + "helper_cmpless", + "helper_cmplepd", + "helper_cmplesd", + "helper_cmpunordps", + "helper_cmpunordss", + "helper_cmpunordpd", + "helper_cmpunordsd", + "helper_cmpneqps", + "helper_cmpneqss", + "helper_cmpneqpd", + "helper_cmpneqsd", + "helper_cmpnltps", + "helper_cmpnltss", + "helper_cmpnltpd", + "helper_cmpnltsd", + "helper_cmpnleps", + "helper_cmpnless", + "helper_cmpnlepd", + "helper_cmpnlesd", + "helper_cmpordps", + "helper_cmpordss", + "helper_cmpordpd", + "helper_cmpordsd", + + "helper_cvtps2pd", + "helper_cvtpd2ps", + "helper_cvtss2sd", + "helper_cvtsd2ss", + "helper_cvtdq2ps", + "helper_cvtdq2pd", + "helper_cvtpi2ps", + "helper_cvtpi2pd", + "helper_cvtsi2ss", + "helper_cvtsi2sd", + "helper_cvtps2dq", + "helper_cvtpd2dq", + "helper_cvtps2pi", + "helper_cvtpd2pi", + "helper_cvtss2si", + "helper_cvtsd2si", + "helper_cvttps2dq", + "helper_cvttpd2dq", + "helper_cvttps2pi", + "helper_cvttpd2pi", + "helper_cvttss2si", + "helper_cvttsd2si", + + "helper_cmpeqps", + "helper_cmpeqss", + "helper_cmpeqpd", + "helper_cmpeqsd", + "helper_cmpltps", + "helper_cmpltss", + "helper_cmpltpd", + "helper_cmpltsd", + "helper_cmpleps", + "helper_cmpless", + "helper_cmplepd", + "helper_cmplesd", + "helper_cmpunordps", + "helper_cmpunordss", + "helper_cmpunordpd", + "helper_cmpunordsd", + "helper_cmpneqps", + "helper_cmpneqss", + "helper_cmpneqpd", + "helper_cmpneqsd", + "helper_cmpnltps", + "helper_cmpnltss", + "helper_cmpnltpd", + "helper_cmpnltsd", + "helper_cmpnleps", + "helper_cmpnless", + "helper_cmpnlepd", + "helper_cmpnlesd", + "helper_cmpordps", + "helper_cmpordss", + "helper_cmpordpd", + "helper_cmpordsd", + + "helper_ucomisd", + "helper_comisd", + "helper_ucomiss", + "helper_comiss", + + "helper_packuswb_xmm", + "helper_packsswb_xmm", + "helper_pmovmskb_xmm", + "helper_pshufw_mmx", + +#elif defined(TARGET_ARM) + "helper_add_cc", + "helper_sub_cc", + "helper_shl_cc", + "helper_shr_cc", + "helper_sar_cc", + "helper_adc_cc", + "helper_sbc_cc", + "helper_shl", + "helper_shr", + "helper_sar", + "helper_clz", + + "helper_sadd8", + "helper_sadd16", + "helper_ssub8", + "helper_ssub16", + "helper_ssubaddx", + "helper_saddsubx", + "helper_uadd8", + "helper_uadd16", + "helper_usub8", + "helper_usub16", + "helper_usubaddx", + "helper_uaddsubx", + + "helper_qadd8", + "helper_qadd16", + "helper_qsub8", + "helper_qsub16", + "helper_qsubaddx", + "helper_qaddsubx", + "helper_uqadd8", + "helper_uqadd16", + "helper_uqsub8", + "helper_uqsub16", + "helper_uqsubaddx", + "helper_uqaddsubx", + + "helper_set_rmode", + "helper_cpsr_write_nzcv", + "helper_cpsr_write", + "helper_cpsr_read", + "helper_vfp_get_fpscr", + "helper_vfp_set_fpscr", + "helper_vfp_adds", + "helper_vfp_addd", + "helper_vfp_subs", + "helper_vfp_subd", + "helper_vfp_muls", + "helper_vfp_muld", + "helper_vfp_divs", + "helper_vfp_divd", + "helper_vfp_negs", + "helper_vfp_negd", + "helper_vfp_abss", + "helper_vfp_absd", + "helper_vfp_sqrts", + "helper_vfp_sqrtd", + "helper_vfp_cmps", + "helper_vfp_cmpd", + "helper_vfp_cmpes", + "helper_vfp_cmped", + + "helper_vfp_muladds", + "helper_vfp_muladdd", + +#if defined(TARGET_AARCH64) + "helper_vfp_cmps_a64", + "helper_vfp_cmpd_a64", + "helper_vfp_cmpes_a64", + "helper_vfp_cmped_a64", + "helper_vfp_minnums", + "helper_vfp_maxnums", + "helper_vfp_minnumd", + "helper_vfp_maxnumd", +#endif +#if !defined(TCG_TARGET_PPC64) + "helper_vfp_fcvtds", + "helper_vfp_fcvtsd", + "helper_vfp_uitos", + "helper_vfp_uitod", + "helper_vfp_sitos", + "helper_vfp_sitod", + "helper_vfp_touis", + "helper_vfp_touid", + "helper_vfp_touizs", + "helper_vfp_touizd", + "helper_vfp_tosis", + "helper_vfp_tosid", + "helper_vfp_tosizs", + "helper_vfp_tosizd", + "helper_vfp_toshs", + "helper_vfp_tosls", + "helper_vfp_touhs", + "helper_vfp_touls", + "helper_vfp_toshd", + "helper_vfp_tosld", + "helper_vfp_touhd", + "helper_vfp_tould", + "helper_vfp_shtos", + "helper_vfp_sltos", + "helper_vfp_uhtos", + "helper_vfp_ultos", + "helper_vfp_shtod", + "helper_vfp_sltod", + "helper_vfp_uhtod", + "helper_vfp_ultod", +#endif + + /* neon helper */ + "helper_neon_qadd_u8", + "helper_neon_qadd_s8", + "helper_neon_qadd_u16", + "helper_neon_qadd_s16", + "helper_neon_qsub_u8", + "helper_neon_qsub_s8", + "helper_neon_qsub_u16", + "helper_neon_qsub_s16", + + "helper_neon_hadd_s8", + "helper_neon_hadd_u8", + "helper_neon_hadd_s16", + "helper_neon_hadd_u16", + "helper_neon_hadd_s32", + "helper_neon_hadd_u32", + "helper_neon_rhadd_s8", + "helper_neon_rhadd_u8", + "helper_neon_rhadd_s16", + "helper_neon_rhadd_u16", + "helper_neon_rhadd_s32", + "helper_neon_rhadd_u32", + "helper_neon_hsub_s8", + "helper_neon_hsub_u8", + "helper_neon_hsub_s16", + "helper_neon_hsub_u16", + "helper_neon_hsub_s32", + "helper_neon_hsub_u32", + + "helper_neon_cgt_u8", + "helper_neon_cgt_s8", + "helper_neon_cgt_u16", + "helper_neon_cgt_s16", + "helper_neon_cgt_u32", + "helper_neon_cgt_s32", + "helper_neon_cge_u8", + "helper_neon_cge_s8", + "helper_neon_cge_u16", + "helper_neon_cge_s16", + "helper_neon_cge_u32", + "helper_neon_cge_s32", + + "helper_neon_min_u8", + "helper_neon_min_s8", + "helper_neon_min_u16", + "helper_neon_min_s16", + "helper_neon_min_u32", + "helper_neon_min_s32", + "helper_neon_max_u8", + "helper_neon_max_s8", + "helper_neon_max_u16", + "helper_neon_max_s16", + "helper_neon_max_u32", + "helper_neon_max_s32", + "helper_neon_pmin_u8", + "helper_neon_pmin_s8", + "helper_neon_pmin_u16", + "helper_neon_pmin_s16", + "helper_neon_pmax_u8", + "helper_neon_pmax_s8", + "helper_neon_pmax_u16", + "helper_neon_pmax_s16", + + "helper_neon_abd_u8", + "helper_neon_abd_s8", + "helper_neon_abd_u16", + "helper_neon_abd_s16", + "helper_neon_abd_u32", + "helper_neon_abd_s32", + + "helper_neon_shl_u8", + "helper_neon_shl_s8", + "helper_neon_shl_u16", + "helper_neon_shl_s16", + "helper_neon_shl_u32", + "helper_neon_shl_s32", + "helper_neon_shl_u64", + "helper_neon_shl_s64", + "helper_neon_rshl_u8", + "helper_neon_rshl_s8", + "helper_neon_rshl_u16", + "helper_neon_rshl_s16", + "helper_neon_rshl_u32", + "helper_neon_rshl_s32", + "helper_neon_rshl_u64", + "helper_neon_rshl_s64", + "helper_neon_qshl_u8", + "helper_neon_qshl_s8", + "helper_neon_qshl_u16", + "helper_neon_qshl_s16", + "helper_neon_qshl_u32", + "helper_neon_qshl_s32", + "helper_neon_qshl_u64", + "helper_neon_qshl_s64", + "helper_neon_qrshl_u8", + "helper_neon_qrshl_s8", + "helper_neon_qrshl_u16", + "helper_neon_qrshl_s16", + "helper_neon_qrshl_u32", + "helper_neon_qrshl_s32", + "helper_neon_qrshl_u64", + "helper_neon_qrshl_s64", + + "helper_neon_add_u8", + "helper_neon_add_u16", + "helper_neon_padd_u8", + "helper_neon_padd_u16", + "helper_neon_sub_u8", + "helper_neon_sub_u16", + "helper_neon_mul_u8", + "helper_neon_mul_u16", + "helper_neon_mul_p8", + + "helper_neon_tst_u8", + "helper_neon_tst_u16", + "helper_neon_tst_u32", + "helper_neon_ceq_u8", + "helper_neon_ceq_u16", + "helper_neon_ceq_u32", + + "helper_neon_abs_s8", + "helper_neon_abs_s16", + "helper_neon_clz_u8", + "helper_neon_clz_u16", + "helper_neon_cls_s8", + "helper_neon_cls_s16", + "helper_neon_cls_s32", + "helper_neon_cnt_u8", + + "helper_neon_qdmulh_s16", + "helper_neon_qrdmulh_s16", + "helper_neon_qdmulh_s32", + "helper_neon_qrdmulh_s32", + + "helper_neon_narrow_u8", + "helper_neon_narrow_u16", + "helper_neon_narrow_sat_u8", + "helper_neon_narrow_sat_s8", + "helper_neon_narrow_sat_u16", + "helper_neon_narrow_sat_s16", + "helper_neon_narrow_sat_u32", + "helper_neon_narrow_sat_s32", + "helper_neon_narrow_high_u8", + "helper_neon_narrow_high_u16", + "helper_neon_narrow_round_high_u8", + "helper_neon_narrow_round_high_u16", + "helper_neon_widen_u8", + "helper_neon_widen_s8", + "helper_neon_widen_u16", + "helper_neon_widen_s16", + + "helper_neon_addl_u16", + "helper_neon_addl_u32", + "helper_neon_paddl_u16", + "helper_neon_paddl_u32", + "helper_neon_subl_u16", + "helper_neon_subl_u32", + "helper_neon_addl_saturate_s32", + "helper_neon_addl_saturate_s64", + "helper_neon_abdl_u16", + "helper_neon_abdl_s16", + "helper_neon_abdl_u32", + "helper_neon_abdl_s32", + "helper_neon_abdl_u64", + "helper_neon_abdl_s64", + "helper_neon_mull_u8", + "helper_neon_mull_s8", + "helper_neon_mull_u16", + "helper_neon_mull_s16", + + "helper_neon_negl_u16", + "helper_neon_negl_u32", + "helper_neon_negl_u64", + + "helper_neon_qabs_s8", + "helper_neon_qabs_s16", + "helper_neon_qabs_s32", + "helper_neon_qneg_s8", + "helper_neon_qneg_s16", + "helper_neon_qneg_s32", + + "helper_neon_min_f32", + "helper_neon_max_f32", + "helper_neon_abd_f32", + "helper_neon_add_f32", + "helper_neon_sub_f32", + "helper_neon_mul_f32", + "helper_neon_ceq_f32", + "helper_neon_cge_f32", + "helper_neon_cgt_f32", + "helper_neon_acge_f32", + "helper_neon_acgt_f32", + +#elif defined(TARGET_PPC) + "helper_popcntb", + "helper_cntlzw", + "helper_cntlsw32", + "helper_cntlzw32", + + "helper_compute_fprf", + "helper_store_fpscr", + "helper_fpscr_clrbit", + "helper_fpscr_setbit", + "helper_fcmpo", + "helper_fcmpu", + + "helper_fctiw", + "helper_fctiwz", + "helper_frsp", + "helper_frin", + "helper_friz", + "helper_frip", + "helper_frim", + + "helper_fadd", + "helper_fsub", + "helper_fmul", + "helper_fdiv", + "helper_fmadd", + "helper_fmsub", + "helper_fnmadd", + "helper_fnmsub", + "helper_fabs", + "helper_fnabs", + "helper_fneg", + "helper_fsqrt", + "helper_fre", + "helper_fres", + "helper_frsqrte", + "helper_fsel", + +#elif defined(TARGET_MICROBLAZE) + "helper_addkc", + "helper_subkc", + "helper_cmp", + "helper_cmpu", + "helper_divs", + "helper_divu", +#elif defined(TARGET_MIPS) + "helper_lwl", + "helper_lwr", + "helper_swl", + "helper_swr", +#endif + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/llvm-macro.h b/src/llvm/include/llvm-macro.h new file mode 100644 index 0000000..7b0e613 --- /dev/null +++ b/src/llvm/include/llvm-macro.h @@ -0,0 +1,88 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_MACRO_H +#define __LLVM_MACRO_H + +#if defined(CONFIG_SOFTMMU) +#define SaveStates() SaveGlobals(COHERENCE_GLOBAL, LastInst) +#else +#define SaveStates() +#endif + +#define CONST8(a) ConstantInt::get(Int8Ty, a) +#define CONST16(a) ConstantInt::get(Int16Ty, a) +#define CONST32(a) ConstantInt::get(Int32Ty, a) +#define CONST64(a) ConstantInt::get(Int64Ty, a) +#define CONST128(a) ConstantInt::get(Int128Ty, a) +#define CONSTPtr(a) ConstantInt::get(IntPtrTy, a) + +#define FPCONST32(a) ConstantFP::get(FloatTy, a) +#define FPCONST64(a) ConstantFP::get(DoubleTy, a) +#define FPCONST80(a) ConstantFP::get(FP80Ty, a) +#define FPCONST128(a) ConstantFP::get(FP128Ty, a) + +#define ICMP(a,b,pred) new ICmpInst(LastInst, pred, a, b, "") + +#define AND(a,b) BinaryOperator::Create(Instruction::And, a, b, "", LastInst) +#define OR(a,b) BinaryOperator::Create(Instruction::Or, a, b, "", LastInst) +#define XOR(a,b) BinaryOperator::Create(Instruction::Xor, a, b, "", LastInst) +#define SHL(a,b) BinaryOperator::Create(Instruction::Shl, a, b, "", LastInst) +#define LSHR(a,b) BinaryOperator::Create(Instruction::LShr, a, b, "", LastInst) +#define ASHR(a,b) BinaryOperator::Create(Instruction::AShr, a, b, "", LastInst) +#define ADD(a,b) BinaryOperator::Create(Instruction::Add, a, b, "", LastInst) +#define SUB(a,b) BinaryOperator::Create(Instruction::Sub, a, b, "", LastInst) +#define MUL(a,b) BinaryOperator::Create(Instruction::Mul, a, b, "", LastInst) +#define SDIV(a,b) BinaryOperator::Create(Instruction::SDiv, a, b, "", LastInst) +#define UDIV(a,b) BinaryOperator::Create(Instruction::UDiv, a, b, "", LastInst) +#define SREM(a,b) BinaryOperator::Create(Instruction::SRem, a, b, "", LastInst) +#define UREM(a,b) BinaryOperator::Create(Instruction::URem, a, b, "", LastInst) + +#define FADD(a,b) BinaryOperator::Create(Instruction::FAdd, a, b, "", LastInst) +#define FSUB(a,b) BinaryOperator::Create(Instruction::FSub, a, b, "", LastInst) +#define FMUL(a,b) BinaryOperator::Create(Instruction::FMul, a, b, "", LastInst) +#define FDIV(a,b) BinaryOperator::Create(Instruction::FDiv, a, b, "", LastInst) + +#define CAST(a,t) new BitCastInst(a, t, "", LastInst) +#define CASTPTR8(a) CAST(a,Int8PtrTy) +#define CASTPTR16(a) CAST(a,Int16PtrTy) +#define CASTPTR32(a) CAST(a,Int32PtrTy) +#define CASTPTR64(a) CAST(a,Int64PtrTy) + +#define ITP(a,t) new IntToPtrInst(a, t, "", LastInst) +#define ITP8(a) ITP(a,Int8PtrTy) +#define ITP16(a) ITP(a,Int16PtrTy) +#define ITP32(a) ITP(a,Int32PtrTy) +#define ITP64(a) ITP(a,Int64PtrTy) + +#define TRUNC(a,t) new TruncInst(a, t, "", LastInst) +#define TRUNC8(a) TRUNC(a, Int8Ty) +#define TRUNC16(a) TRUNC(a, Int16Ty) +#define TRUNC32(a) TRUNC(a, Int32Ty) +#define TRUNC64(a) TRUNC(a, Int64Ty) + +#define ZEXT(a,t) new ZExtInst(a, t, "", LastInst) +#define ZEXT8(a) ZEXT(a, Int8Ty) +#define ZEXT16(a) ZEXT(a, Int16Ty) +#define ZEXT32(a) ZEXT(a, Int32Ty) +#define ZEXT64(a) ZEXT(a, Int64Ty) +#define ZEXT128(a) ZEXT(a, Int128Ty) +#define SEXT(a,t) new SExtInst(a, t, "", LastInst) +#define SEXT8(a) SEXT(a, Int8Ty) +#define SEXT16(a) SEXT(a, Int16Ty) +#define SEXT32(a) SEXT(a, Int32Ty) +#define SEXT64(a) SEXT(a, Int64Ty) +#define SEXT128(a) SEXT(a, Int128Ty) + +#define BSWAP16(a) CreateBSwap(Int16Ty, a, LastInst) +#define BSWAP32(a) CreateBSwap(Int32Ty, a, LastInst) +#define BSWAP64(a) CreateBSwap(Int64Ty, a, LastInst) + + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-opc.h b/src/llvm/include/llvm-opc.h new file mode 100644 index 0000000..9454dac --- /dev/null +++ b/src/llvm/include/llvm-opc.h @@ -0,0 +1,494 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_OPC_H +#define __LLVM_OPC_H + +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "qemu-types.h" +#include "llvm-types.h" +#include "llvm-translator.h" +#include "llvm.h" + +//#define ASSERT +//#define VERIFY_TB + + +#define IRDebug(idx) \ + do { \ + dbg() << DEBUG_ENTRY << "op_" << llvm_op_defs[idx].name << ": " \ + << llvm_op_defs[idx].nb_oargs << " " \ + << llvm_op_defs[idx].nb_iargs << " " \ + << llvm_op_defs[idx].nb_cargs << "\n"; \ + } while (0) +#define IRError(fmt,args...) hqemu_error(fmt,##args) + +#ifdef ASSERT +#define AssertType(t) \ + do { \ + if (!(t)) \ + hqemu_error("invalid type.\n"); \ + } while(0) +#else +#define AssertType(t) +#endif + +#define IRAbort() \ + do { \ + if (!LLEnv->isTraceMode()) { \ + Func->dump(); \ + hqemu_error("fixme.\n"); \ + } \ + Builder->Abort(); \ + } while (0) + + +class LLVMTranslator; +class NotifyInfo; +class OptimizationInfo; + + +/* Patch flags. + * NOTE: patch flags must be synchronized with those in the LLVM backend. */ +enum { + PATCH_HQEMU = 0x4182U, + PATCH_DUMMY, + PATCH_EXIT_TB, + PATCH_DIRECT_JUMP, + PATCH_TRACE_BLOCK_CHAINING, + PATCH_QMMU, +}; + +/* + * Register is used to describe the pseudo registers used by QEMU TCG op. + */ +struct Register { + /* Status of the register. */ + enum { + STATE_NONE = 0x0, + STATE_REV = 0x1, /* Register is reserved */ + STATE_REG = 0x2, /* Register is promoted */ + STATE_MEM = 0x4, /* Register is in CPUArchState memory */ + STATE_LOC = 0x8, /* Register is a local register */ + STATE_TMP = 0x10, /* Register is a tmp register */ + }; + + int State; /* State of the register */ + int Base; + intptr_t Off; /* Register offset of CPUArchState */ + int Size; /* Register size */ + std::string Name; /* Name string of this register */ + bool Dirty; /* This register is updated or not */ + Type *Ty; /* Register type in LLVM */ + Value *Data; /* Data value if this regisrer is promoted */ + Value *AI; /* Register as Alloca */ + Register *Alias; + + Register() : State(STATE_NONE), Off(-1), Dirty(false), Ty(nullptr), + Data(nullptr), AI(nullptr), Alias(nullptr) {} + + void set(int base, intptr_t off, std::string name) { + Base = base; + Off = off; + Name = name; + } + void reset(int state, int size, Type *ty) { + State = state; + Size = size; + Ty = ty; + Dirty = false; + Data = AI = nullptr; + } + + void Promote() { State |= STATE_REG; } + void Demote() { State &= ~STATE_REG; } + + Value *getData() { return Data; } + Register &getAlias() { return *Alias; } + + void setState(int state) { State = state; } + void setData(Value *data, bool dirty = false) { + if (Alias) { + Alias->setData(data, dirty); + return; + } + Data = data; + Dirty = dirty; + Promote(); + } + bool isRev() { return State & STATE_REV; } + bool isReg() { return State & STATE_REG; } + bool isMem() { return State & STATE_MEM; } + bool isLocal() { return State & STATE_LOC; } + bool isDirty() { return Dirty; } + bool isAlias() { return Alias != nullptr; } +}; + +/* + * TraceBuilder provides the facilities to build a trace in IRFactory. + */ +class TraceBuilder { + typedef std::map<target_ulong, + std::pair<GraphNode*, BasicBlock*> > NodeBuildMap; + typedef std::vector<std::pair<BranchInst*, GraphNode*> > BranchList; + + IRFactory *IF; + OptimizationInfo *Opt; + GraphNode *CurrNode; /* The current CFG node to process */ + NodeBuildMap Nodes; + BranchList Branches; + NodeVec NodeQueue; /* CFG nodes to be translated */ + NodeSet NodeVisisted; + NodeVec NodeUsed; + bool Aborted; + uint32_t Attribute; + + TraceInfo *Trace; + +public: + TraceBuilder(IRFactory *IRF, OptimizationInfo *Opt); + ~TraceBuilder() {} + + void ConvertToTCGIR(CPUArchState *env); + void ConvertToLLVMIR(); + void Abort(); + void Finalize(); + bool isAborted() { return Aborted; } + + OptimizationInfo *getOpt() { return Opt; } + TraceInfo *getTrace() { return Trace; } + GraphNode *getEntryNode() { return Opt->getCFG(); } + GraphNode *getCurrNode() { return CurrNode; } + unsigned getNumNodes() { return Nodes.size(); } + std::string getPCString(GraphNode *Node) { + std::stringstream ss; + ss << std::hex << Node->getGuestPC(); + return ss.str(); + } + + GraphNode *getNextNode() { + if (NodeQueue.empty()) + return nullptr; + CurrNode = NodeQueue.back(); + NodeQueue.pop_back(); + + if (NodeVisisted.find(CurrNode) != NodeVisisted.end()) + return getNextNode(); + + NodeVisisted.insert(CurrNode); + NodeUsed.push_back(CurrNode); + return CurrNode; + } + + target_ulong getGuestPC(GraphNode *Node) { +#if defined(TARGET_I386) + return Node->getTB()->pc - Node->getTB()->cs_base; +#else + return Node->getTB()->pc; +#endif + } + void setUniqueNode(GraphNode *Node) { + target_ulong gpc = getGuestPC(Node); + if (Nodes.find(gpc) == Nodes.end()) + Nodes[gpc] = std::make_pair(Node, nullptr); + } + void setBasicBlock(GraphNode *Node, BasicBlock *BB) { + target_ulong gpc = getGuestPC(Node); + if (Nodes.find(gpc) == Nodes.end()) + hqemu_error("internal error.\n"); + Nodes[gpc].second = BB; + } + void setBranch(BranchInst *BI, GraphNode *Node) { + Branches.push_back(std::make_pair(BI, Node)); + target_ulong gpc = getGuestPC(Node); + if (!Nodes[gpc].second) + NodeQueue.push_back(Node); + } + GraphNode *getNode(target_ulong gpc) { + return Nodes.find(gpc) == Nodes.end() ? nullptr : Nodes[gpc].first; + } + BasicBlock *getBasicBlock(GraphNode *Node) { + target_ulong gpc = getGuestPC(Node); + if (Nodes.find(gpc) == Nodes.end()) + hqemu_error("internal error.\n"); + return Nodes[gpc].second; + } + void addAttribute(uint32_t Attr) { + Attribute |= Attr; + } +}; + + +#define META_CONST "const" +#define META_GVA "gva" +#define META_LOOP "loop" +#define META_EXIT "exit" +#define META_CC "cc" + +class MDFactory { + uint32_t UID; + LLVMContext &Context; + MDNode *Dummy; + + ConstantInt *getUID() { + return ConstantInt::get(IntegerType::get(Context, 32), UID++); + } + +public: + MDFactory(Module *M); + ~MDFactory(); + + MDNode *getMDNode(ArrayRef<ConstantInt*> V); + DebugLoc getDebugLoc(unsigned Line, unsigned Col, Function *F, + ArrayRef<ConstantInt*> Meta); + + void setConst(Instruction *I) { I->setMetadata(META_CONST, Dummy); } + void setGuestMemory(Instruction *I) { I->setMetadata(META_GVA, Dummy); } + void setLoop(Instruction *I) { I->setMetadata(META_LOOP, Dummy); } + void setExit(Instruction *I) { I->setMetadata(META_EXIT, Dummy); } + void setCondition(Instruction *I) { I->setMetadata(META_CC, Dummy); } + + static bool isConst(Instruction *I) { + return I->getMetadata(META_CONST); + } + static bool isGuestMemory(Instruction *I) { + return I->getMetadata(META_GVA); + } + static bool isLoop(Instruction *I) { + return I->getMetadata(META_LOOP); + } + static bool isExit(Instruction *I) { + return I->getMetadata(META_EXIT); + } + static bool isCondition(Instruction *I) { + return I->getMetadata(META_CC); + } + + static void setConstStatic(LLVMContext &Context, Instruction *I, + ArrayRef<ConstantInt*> V); +}; + +/* + * IRFactory conducts QEMU TCG opcodes to LLVM IR conversion. + */ +class IRFactory { + typedef std::map<std::pair<intptr_t, Type *>, Value *> StatePtrMap; + typedef std::map<TCGArg, BasicBlock *> LabelMap; + + enum { + COHERENCE_NONE = 0, + COHERENCE_GLOBAL, + COHERENCE_ALL, + }; + + bool InitOnce; + + /* Basic types */ + Type *VoidTy; + IntegerType *Int8Ty; + IntegerType *Int16Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *Int128Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int16PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Type *FloatTy; + Type *DoubleTy; + Type *FP80Ty; + Type *FP128Ty; + + ConstantInt *ExitAddr; + + LLVMTranslator &Translator; /* Uplink to the LLVMTranslator instance */ + LLVMContext *Context; /* Translator local context */ + Module *Mod; /* The LLVM module */ + ExecutionEngine *EE; /* The JIT compiler */ + EventListener *Listener; /* The JIT listener */ + JITEventListener *IntelJIT; /* The Intel JIT listener */ + const DataLayout *DL; /* Data layout */ + TraceBuilder *Builder; + MDFactory *MF; + MCDisasm *HostDisAsm; + + HelperMap &Helpers; + std::vector<BaseRegister> &BaseReg; /* TCG base register */ + std::vector<Register> Reg; /* TCG virtual registers */ + LabelMap Labels; /* TCG labels */ + int Segment; + GuestBaseRegister &GuestBaseReg; /* Reserved guest base register */ + + Function *Func; /* The container of LLVM IR to be translated */ + BasicBlock *InitBB; /* BasicBlock for variable decalaration */ + BasicBlock *CurrBB; /* Current BasicBlock to insert LLVM IR */ + BasicBlock *ExitBB; /* Temp BasicBlock as the exit-function stub */ + BranchInst *LastInst; /* Position to insert LLVM IR */ + + Instruction *CPU; /* Base register with (char*) type */ + Instruction *CPUStruct; /* Base register with (struct CPUArchState*) type */ + Instruction *GEPInsertPos; /* Position to insert GEP instruction */ + + StatePtrMap StatePtr; + IVec InlineCalls; /* Helpers to be inlined */ + std::map<std::string, BasicBlock*> CommonBB; + IVec IndirectBrs; + IVec toErase; + BBVec toSink; + std::set<Function *> ClonedFuncs; + bool runPasses; + + void CreateJIT(); + void DeleteJIT(); + + /* Initialize basic types used during IR conversion. */ + void InitializeTypes(); + + /* Store dirty states back to CPU state in the memory. */ + void SaveGlobals(int level, Instruction *InsertPos); + + /* Sync PC to CPU state in the memory. */ + void CreateStorePC(Instruction *InsertPos); + + /* Get or insert the pointer to the CPU state. */ + Value *StatePointer(Register ®); + Value *StatePointer(Register ®, intptr_t Off, Type *PTy); + + /* Load value from the CPU state in the memory. */ + Value *LoadState(Register ®); + void StoreState(Register ®, Instruction *InsertPos); + + /* Load/Store data from/to the guest memory. */ + Value *QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi); + void QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi); + + Value *ConvertCPUType(Function *F, int Idx, Instruction *InsertPos); + Value *ConvertCPUType(Function *F, int Idx, BasicBlock *InsertPos); + + Value *ConvertEndian(Value *V, int opc); + Value *getExtendValue(Value *V, Type *Ty, int opc); + Value *getTruncValue(Value *V, int opc); + int getSizeInBits(int opc) { + return 8 * (1 << (opc & MO_SIZE)); + } + + Value *ConcatTLBVersion(Value *GVA); + + /* Return the LLVM instruction that stores PC. For the guest's register + * size larger than the host, replace the multiple store-PC instructions + * to one single store-PC instruction. */ + StoreInst *getStorePC(); + + /* Create both chaining and exiting stubs. */ + void InsertLinkAndExit(Instruction *InsertPos); + + /* Create exit stub */ + void InsertExit(uintptr_t RetVal, bool setExit = false); + + /* Find the next node of a trace according to the brach pc. + * Return null if we cannot find one. */ + GraphNode *findNextNode(target_ulong pc); + + /* Perform internal linking of basic blocks to form a region. */ + void TraceLink(StoreInst *SI); + + /* Link basic blocks of direct branch. */ + void TraceLinkDirectJump(GraphNode *NextNode, StoreInst *SI); + void TraceLinkDirectJump(StoreInst *SI); + + /* Link basic blocks of indirect branch. */ + void TraceLinkIndirectJump(GraphNode *NextNode, StoreInst *SI); + + /* Insert code for IBTC hash table lookup. */ + void InsertLookupIBTC(GraphNode *CurrNode); + + /* Insert code for CPBL hash table lookup. */ + void InsertLookupCPBL(GraphNode *CurrNode); + + void TraceValidateCPBL(GraphNode *NextNode, StoreInst *StorePC); + + /* Insert bswap intrinsic instruction. */ + Value *CreateBSwap(Type *Ty, Value *V, Instruction *InsertPos); + + /* Given the size, return its PointerType. */ + PointerType *getPointerTy(int Size, unsigned AS = 0); + + /* Analyze a helper function to determine if it will be inlined or not. */ + int AnalyzeInlineCost(CallSite CS); + + /* Perform helper function inlining. */ + void ProcessInline(); + + void VerifyFunction(Function &F); + + /* Legalize LLVM IR before running the pre-defined passes. */ + void PreProcess(); + + void Optimize(); + + /* Legalize LLVM IR after running the pre-defined passes. */ + void PostProcess(); + + void FinalizeObject(); + + void InitializeLLVMPasses(legacy::FunctionPassManager *FPM); + + uint32_t setRestorePoint(TCGMemOpIdx oi) { + if (oi != (uint16_t)oi) + hqemu_error("key value too large.\n"); + return (NI.setRestorePoint() << 16) | oi; + } + +public: + typedef void (IRFactory::*FuncPtr)(const TCGArg *); + + NotifyInfo &NI; /* Info to pass among translator and JIT */ + + /* QEMU TCG IR to LLVM IR converion routines. */ +#define DEF(name, oargs, iargs, cargs, flags) void op_ ## name(const TCGArg *); +#include "tcg-opc.h" +#undef DEF + + IRFactory(LLVMTranslator *Trans); + ~IRFactory(); + + void CreateSession(TraceBuilder *builder); + void DeleteSession(); + + /* Prepare the initial LLVM Function, BasicBlocks and variables. */ + void CreateFunction(); + void CreateBlock(); + + /* Start LLVM JIT compilation. */ + void Compile(); + + /* Set instruction BI to jump to the basic block BB. */ + void setSuccessor(BranchInst *BI, BasicBlock *BB); + + /* Get function pointer of the IR converion routines. */ + void *getOpcFunc(); + + Function *ResolveFunction(std::string Name); + + LLVMTranslator &getTranslator() { return Translator; } + LLVMContext &getContext() { return *Context; } + const DataLayout *getDL() { return DL; } + MDFactory *getMDFactory() { return MF; } + HelperMap &getHelpers() { return Helpers; } + TraceInfo *getTrace() { return Builder->getTrace(); } + Value *getGuestBase() { return GuestBaseReg.Base; } + Instruction *getDefaultCPU(Function &F); + +public: + static bool isStateOfPC(intptr_t Off); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-pass.h b/src/llvm/include/llvm-pass.h new file mode 100644 index 0000000..75bcf4a --- /dev/null +++ b/src/llvm/include/llvm-pass.h @@ -0,0 +1,205 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_PASS_H +#define __LLVM_PASS_H + +#include <map> +#include <vector> +#include "llvm-types.h" + +class IRFactory; + + +static inline Value *getPointerOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + return nullptr; +} + +static inline Value *getValueOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getValueOperand(); + return nullptr; +} + +static inline unsigned getAddressSpaceOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerAddressSpace(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerAddressSpace(); + return -1; +} + +/* A CPU state reference. */ +struct StateRef { + StateRef(intptr_t Start, intptr_t End, Instruction *I) + : Start(Start), End(End), I(I) {} + intptr_t Start; + intptr_t End; + Instruction *I; + + intptr_t getSize() { + return End - Start; + } + Type *getType() { + return getValueOperand(I)->getType(); + } +}; + +/* A group of references to a CPU state. */ +struct StateData { + intptr_t Start; + intptr_t End; + std::vector<StateRef*> Refs; + + void reset(StateRef &Ref) { + Start = Ref.Start; + End = Ref.End; + Refs.clear(); + Refs.push_back(&Ref); + } + void insert(StateRef &Ref) { + End = std::max(End, Ref.End); + Refs.push_back(&Ref); + } +}; + +typedef std::map<intptr_t, intptr_t> StateRange; +typedef std::vector<StateData> StateList; +typedef std::vector<CallInst*> CallList; + +/* + * The purpose of StateAnalyzer is to analyze loads/stores of CPU states and + * group loads/stores of the same CPU state into the same bucket (StateData). + */ +class StateAnalyzer { + const DataLayout *DL; + std::vector<StateRef> StateRefs; + CallList Calls; + StateList States; + + /* Sort state references by the state offset. */ + void sortStateRefs() { + if (StateRefs.empty()) + return; + std::sort(StateRefs.begin(), StateRefs.end(), + [](const StateRef &lhs, const StateRef &rhs) -> bool { + return lhs.Start < rhs.Start; + }); + } + +public: + StateAnalyzer(const DataLayout *DL) : DL(DL) {} + + void clear() { + StateRefs.clear(); + Calls.clear(); + States.clear(); + } + + /* Add a CPU state reference. */ + void addStateRef(Instruction *I, intptr_t Off) { + Type *Ty = getValueOperand(I)->getType(); + intptr_t Start = Off; + intptr_t End = Off + DL->getTypeSizeInBits(Ty) / 8; + StateRefs.push_back(StateRef(Start, End, I)); + } + + /* Add a helper function call. */ + void addCall(CallInst *CI) { + Calls.push_back(CI); + } + + /* Return non-overlapped ranges of states. */ + void computeStateRange(StateRange &Reads, StateRange &Writes) { + computeState(); + if (StateRefs.empty()) + return; + + const uint8_t READ = 0x1; + const uint8_t WRITE = 0x2; + for (auto &State : States) { + uint8_t RW = 0; + for (auto &Ref : State.Refs) + RW |= isa<LoadInst>(Ref->I) ? READ : WRITE; + if (RW & READ) + Reads[State.Start] = State.End; + if (RW & WRITE) + Writes[State.Start] = State.End; + } + } + + /* Compute referenced states and group instructions. */ + void computeState() { + /* Sort state refs by the offset. */ + sortStateRefs(); + if (StateRefs.empty()) + return; + + StateData State; + State.reset(StateRefs.front()); + for (unsigned i = 1, e = StateRefs.size(); i != e; ++i) { + StateRef &Next = StateRefs[i]; + if (State.End <= Next.Start) { + /* The next reference is not overlapped with the previous + * reference. A new state is found. */ + States.push_back(State); + /* Reset Curr to the next state. */ + State.reset(Next); + } else { + /* Overlap and merge. */ + State.insert(Next); + } + } + /* The last state. */ + States.push_back(State); + } + + StateList &getStateList() { + return States; + } + + CallList &getCalls() { + return Calls; + } +}; + + +namespace llvm { +/* Passes */ +FunctionPass *createReplaceIntrinsic(); +FunctionPass *createFastMathPass(); +FunctionPass *createProfileExec(IRFactory *IF); +FunctionPass *createStateMappingPass(IRFactory *IF); +FunctionPass *createRedundantStateElimination(IRFactory *IF); +FunctionPass *createCombineGuestMemory(IRFactory *IF); +FunctionPass *createCombineCasts(IRFactory *IF); +FunctionPass *createCombineZExtTrunc(); +FunctionPass *createSimplifyPointer(IRFactory *IF); + +void initializeReplaceIntrinsicPass(llvm::PassRegistry&); +void initializeFastMathPassPass(llvm::PassRegistry&); +void initializeProfileExecPass(llvm::PassRegistry&); +void initializeStateMappingPassPass(llvm::PassRegistry&); +void initializeRedundantStateEliminationPass(llvm::PassRegistry&); +void initializeCombineGuestMemoryPass(llvm::PassRegistry&); +void initializeCombineCastsPass(llvm::PassRegistry&); +void initializeCombineZExtTruncPass(llvm::PassRegistry&); +void initializeSimplifyPointerPass(llvm::PassRegistry&); + +/* Analysis */ +void initializeInnerLoopAnalysisWrapperPassPass(llvm::PassRegistry&); +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-soft-perfmon.h b/src/llvm/include/llvm-soft-perfmon.h new file mode 100644 index 0000000..c55201e --- /dev/null +++ b/src/llvm/include/llvm-soft-perfmon.h @@ -0,0 +1,74 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_SOFT_PERFMON_H +#define __LLVM_SOFT_PERFMON_H + +#include "utils.h" + +#define MAX_SPM_THREADS 256 + +#define SPM_NONE (uint64_t)0 +#define SPM_BASIC ((uint64_t)1 << 0) +#define SPM_TRACE ((uint64_t)1 << 1) +#define SPM_CACHE ((uint64_t)1 << 2) +#define SPM_PASS ((uint64_t)1 << 3) +#define SPM_HPM ((uint64_t)1 << 4) +#define SPM_EXIT ((uint64_t)1 << 5) +#define SPM_HOTSPOT ((uint64_t)1 << 6) +#define SPM_ALL SPM_BASIC | SPM_TRACE | SPM_CACHE | SPM_PASS | SPM_HPM | \ + SPM_EXIT | SPM_HOTSPOT +#define SPM_NUM 9 + + +/* + * Software Performance Monitor (SPM) + */ +class SoftwarePerfmon { +public: + typedef void (*ExitFuncPtr)(void); + + uint64_t Mode; /* Profile level */ + uint64_t NumInsns; /* Number of instructions */ + uint64_t NumBranches; /* Number of branches */ + uint64_t NumLoads; /* Number of memory loads */ + uint64_t NumStores; /* Number of memory stores */ + uint64_t NumTraceExits; /* Count of trace exits */ + uint64_t SampleTime; /* Process time of the sampling handler. */ + unsigned CoverSet; + std::vector<std::vector<uint64_t> *> SampleListVec; + + SoftwarePerfmon() + : Mode(SPM_NONE), NumInsns(0), NumBranches(0), NumLoads(0), NumStores(0), + NumTraceExits(0), SampleTime(0), CoverSet(90) {} + SoftwarePerfmon(std::string &ProfileLevel) : SoftwarePerfmon() { + ParseProfileMode(ProfileLevel); + } + + bool isEnabled() { + return Mode != SPM_NONE; + } + + void registerExitFn(ExitFuncPtr F) { + ExitFunc.push_back(F); + } + + void printProfile(); + +private: + std::vector<ExitFuncPtr> ExitFunc; + + void ParseProfileMode(std::string &ProfileLevel); + void printBlockProfile(); + void printTraceProfile(); +}; + +extern SoftwarePerfmon *SP; + +#endif /* __LLVM_SOFT_PERFMON_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-state.h b/src/llvm/include/llvm-state.h new file mode 100644 index 0000000..e573073 --- /dev/null +++ b/src/llvm/include/llvm-state.h @@ -0,0 +1,194 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file implements the basic optimization schemes including indirect + * branch target cache (IBTC), indirect branch chain (IB chain), and trace + * profiling and prediction routines. + */ + +#ifndef __LLVM_STATE_H +#define __LLVM_STATE_H + +#define COPY_STATE(_dst, _src, _e) do { _dst->_e = _src->_e; } while(0) + +/* + * The following data structure and routine are used to save/restore the states + * of CPUArchState. Only the states that could affect decoding the guest binary by + * the TCG front-end are saved/restored. Such states are saved when translating + * the block at the first time because the states could change later and are + * restored to the saved values when the block is decoded again during the + * trace formation. + */ +#if defined(TARGET_I386) || defined(TARGET_X86_64) +typedef struct i386_env { + int singlestep_enabled; + uint32_t hflags; + target_ulong eflags; +} cpustate; +#elif defined(TARGET_ARM) +typedef struct arm_env { + int singlestep_enabled; + uint32_t pstate; + uint32_t aarch64; + struct { + uint32_t c15_cpar; + uint64_t scr_el3; + } cp15; + uint32_t uncached_cpsr; + uint64_t features; +} cpustate; +#elif defined(TARGET_PPC) || defined(TARGET_PPC64) +typedef struct ppc_env { + int singlestep_enabled; + target_ulong msr; + int mmu_idx; + uint32_t flags; + uint64_t insns_flags; + uint64_t insns_flags2; + target_ulong hflags; +} cpustate; +#elif defined(TARGET_SH4) +typedef struct sh4_env { + int singlestep_enabled; + uint32_t sr; /* status register */ + uint32_t fpscr; /* floating point status/control register */ + uint32_t features; +} cpustate; +#elif defined(TARGET_M68K) +typedef struct m68k_env { + int singlestep_enabled; + uint32_t sr; /* status register */ + uint32_t fpcr; /* floating point status/control register */ +} cpustate; +#elif defined(TARGET_MIPS) +typedef struct mips_env { + int singlestep_enabled; + target_ulong btarget; +} cpustate; +#else +typedef struct dummy_env { + int dummy; +} cpustate; +#endif + +static inline void tcg_save_state(CPUArchState *env, TranslationBlock *tb) +{ +#if defined(TARGET_I386) || defined(TARGET_X86_64) + CPUState *cpu = ENV_GET_CPU(env); + struct i386_env *s = new struct i386_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, hflags); + COPY_STATE(s, env, eflags); +#elif defined(TARGET_ARM) + CPUState *cpu = ENV_GET_CPU(env); + struct arm_env *s = new struct arm_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, cp15.c15_cpar); + COPY_STATE(s, env, cp15.scr_el3); + COPY_STATE(s, env, uncached_cpsr); + COPY_STATE(s, env, features); + COPY_STATE(s, env, pstate); + COPY_STATE(s, env, aarch64); +#elif defined(TARGET_PPC) || defined(TARGET_PPC64) + CPUState *cpu = ENV_GET_CPU(env); + struct ppc_env *s = new struct ppc_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, msr); + COPY_STATE(s, env, mmu_idx); + COPY_STATE(s, env, flags); + COPY_STATE(s, env, insns_flags); + COPY_STATE(s, env, insns_flags2); + COPY_STATE(s, env, hflags); +#elif defined(TARGET_SH4) + CPUState *cpu = ENV_GET_CPU(env); + struct sh4_env *s = new struct sh4_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, sr); + COPY_STATE(s, env, fpscr); + COPY_STATE(s, env, features); +#elif defined(TARGET_M68K) + CPUState *cpu = ENV_GET_CPU(env); + struct m68k_env *s = new struct m68k_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, sr); + COPY_STATE(s, env, fpcr); +#elif defined(TARGET_MIPS) + CPUState *cpu = ENV_GET_CPU(env); + struct mips_env *s = new struct mips_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, btarget); +#else + void *s = nullptr; +#endif + + tb->state = (void *)s; +} + +/* + * tcg_restore_state() + * Reset states to those when the block is first translated. + */ +static inline void tcg_copy_state(CPUArchState *env, TranslationBlock *tb) +{ +#if defined(TARGET_I386) || defined(TARGET_X86_64) + CPUState *cpu = ENV_GET_CPU(env); + struct i386_env *i386e = (struct i386_env *)tb->state; + COPY_STATE(cpu, i386e, singlestep_enabled); + COPY_STATE(env, i386e, hflags); + COPY_STATE(env, i386e, eflags); +#elif defined(TARGET_ARM) + CPUState *cpu = ENV_GET_CPU(env); + struct arm_env *arme = (struct arm_env *)tb->state; + COPY_STATE(cpu, arme, singlestep_enabled); + COPY_STATE(env, arme, cp15.c15_cpar); + COPY_STATE(env, arme, cp15.scr_el3); + COPY_STATE(env, arme, uncached_cpsr); + COPY_STATE(env, arme, features); + COPY_STATE(env, arme, pstate); + COPY_STATE(env, arme, aarch64); +#elif defined(TARGET_PPC) || defined(TARGET_PPC64) + CPUState *cpu = ENV_GET_CPU(env); + struct ppc_env *ppce = (struct ppc_env *)tb->state; + COPY_STATE(cpu, ppce, singlestep_enabled); + COPY_STATE(env, ppce, msr); + COPY_STATE(env, ppce, mmu_idx); + COPY_STATE(env, ppce, flags); + COPY_STATE(env, ppce, insns_flags); + COPY_STATE(env, ppce, insns_flags2); + COPY_STATE(env, ppce, hflags); +#elif defined(TARGET_SH4) + CPUState *cpu = ENV_GET_CPU(env); + struct sh4_env *sh4e = (struct sh4_env *)tb->state; + COPY_STATE(cpu, sh4e, singlestep_enabled); + COPY_STATE(env, sh4e, sr); + COPY_STATE(env, sh4e, fpscr); + COPY_STATE(env, sh4e, features); +#elif defined(TARGET_M68K) + CPUState *cpu = ENV_GET_CPU(env); + struct m68k_env *m68ke = (struct m68k_env *)tb->state; + COPY_STATE(cpu, m68ke, singlestep_enabled); + COPY_STATE(env, m68ke, sr); + COPY_STATE(env, m68ke, fpcr); +#elif defined(TARGET_MIPS) + CPUState *cpu = ENV_GET_CPU(env); + struct mips_env *mipse = (struct mips_env *)tb->state; + COPY_STATE(cpu, mipse, singlestep_enabled); + COPY_STATE(env, mipse, btarget); +#endif +} + +static inline void delete_state(TranslationBlock *tb) +{ + delete (cpustate *)tb->state; + tb->state = nullptr; +} + +#undef COPY_STATE +#endif /* __LLVM_STATE_H */ + + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/llvm-target.h b/src/llvm/include/llvm-target.h new file mode 100644 index 0000000..1784942 --- /dev/null +++ b/src/llvm/include/llvm-target.h @@ -0,0 +1,116 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_TARGET_H +#define __LLVM_TARGET_H + +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm-types.h" +#include "llvm-translator.h" + +#ifndef __PRI64_PREFIX +# if __WORDSIZE == 64 +# define __PRI64_PREFIX "l" +# else +# define __PRI64_PREFIX "ll" +# endif +#endif + +#if TARGET_LONG_BITS == 32 +# define PRId "d" +# define PRIx "x" +#else +# define PRId __PRI64_PREFIX "d" +# define PRIx __PRI64_PREFIX "x" +#endif + +#define PRId64 __PRI64_PREFIX "d" +#define PRIu64 __PRI64_PREFIX "u" + +class code_ostream { + char *OutBufStart; + char *OutBufCur; +public: + void Skip(unsigned Size) { + OutBufCur += Size; + } + + code_ostream(uintptr_t Ptr) + : OutBufStart((char *)Ptr), OutBufCur((char *)Ptr) {} + code_ostream &operator<<(char C) { + *OutBufCur = C; + OutBufCur++; + return *this; + } + code_ostream &operator<<(unsigned char C) { + *(unsigned char *)OutBufCur = C; + OutBufCur++; + return *this; + } + code_ostream &operator<<(unsigned int C) { + *(unsigned int *)OutBufCur = C; + OutBufCur += sizeof(unsigned int); + return *this; + } + code_ostream &operator<<(unsigned long C) { + *(unsigned long *)OutBufCur = C; + OutBufCur += sizeof(unsigned long); + return *this; + } +}; + +static inline void EmitByte(code_ostream &OS, unsigned char C) +{ + OS << (char)C; +} +static inline void EmitConstant(code_ostream &OS, uint64_t Val, unsigned Size) +{ + for (unsigned i = 0; i != Size; ++i) { + EmitByte(OS, Val & 255); + Val >>= 8; + } +} + +/* + * EventListener is used by the JIT to notify clients about significant events + * during compilation. + */ +class EventListener : public JITEventListener { + NotifyInfo &NI; + +public: + EventListener(NotifyInfo &NI) : NI(NI) {} + ~EventListener() {} + virtual void NotifyFunctionEmitted(const Function &F, void *Code, size_t Size, + const EmittedFunctionDetails &Details); +#if defined(LLVM_V35) + virtual void NotifyObjectEmitted(const ObjectImage &Obj); +#else + virtual void NotifyObjectEmitted(const object::ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &L); +#endif +}; + + +const char *getMMUFName(const void *func); +bool isMMUFunction(std::string &Name); +bool isLMTFunction(std::string &Name); +bool isIllegalHelper(const void *func); +bool isLibcall(std::string &Name); +bool isSoftFPcall(std::string &Name); +void AddDependentSymbols(LLVMTranslator *Translator); +Value *StripPointer(Value *Ptr); +Value *StripPointerWithConstantOffset(const DataLayout *DL, Value *Ptr, + APInt &Offset, Value *GuestBase); +Value *getBaseWithConstantOffset(const DataLayout *DL, Value *Ptr, intptr_t &Offset); +void ProcessErase(IVec &toErase); + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/llvm-translator.h b/src/llvm/include/llvm-translator.h new file mode 100644 index 0000000..d1d92c5 --- /dev/null +++ b/src/llvm/include/llvm-translator.h @@ -0,0 +1,270 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_TRANSLATOR_H +#define __LLVM_TRANSLATOR_H + +#include <map> +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm-types.h" +#include "llvm-pass.h" +#include "llvm.h" + + +class OptimizationInfo; +class EventListener; +class NotifyInfo; +class IRFactory; +class TraceBuilder; + + +/* + * BaseRegister is used to describe the `reserved' registers by QEMU TCG. + * Ex: R14 for the x86 host or R7 for the ARM host. + */ +struct BaseRegister { + BaseRegister() : Base(nullptr) {} + int RegNo; /* Register number */ + std::string Name; /* Register name string */ + Type *Ty; /* Type (struct CPUArchState) */ + Instruction *Base; /* CallInst to retrieve basereg */ +}; + +struct GuestBaseRegister { + GuestBaseRegister() : Name(""), Base(nullptr) {} + std::string Name; /* Register name string */ + Value *Base; /* CallInst to retrieve basereg */ +}; + +/* + * Information of helper functions defined in llvm-helper.h. + */ +struct HelperInfo { + HelperInfo() + : ConflictSize(0), mayConflictArg(false), hasNestedCall(false) {} + + struct ArgInfo { + unsigned ConstantWeight; /* Weight if the argument is a constant */ + unsigned AllocaWeight; /* Weight if the argument is a alloca */ + ArgInfo(unsigned CWeight, unsigned AWeight) + : ConstantWeight(CWeight), AllocaWeight(AWeight) {} + }; + + Function *Func; /* Function symbol to be inlined */ + Function *FuncNoInline; /* Function symbol not to be inlined */ + std::vector<std::pair<Instruction*, intptr_t> > States; + std::vector<CallInst*> NestedCalls; + StateRange StateUse; + StateRange StateDef; + CodeMetrics Metrics; /* Inlining metrics */ + std::vector<ArgInfo> ArgumentWeights; /* Weight of the function arguments */ + intptr_t ConflictSize; + + bool mayConflictArg; /* Arguments conflict with state mapping or not */ + bool hasNestedCall; /* This function has nested function or not */ + + void CalculateMetrics(Function *F); + + void insertState(StateRange &Range, bool isWrite) { + if (isWrite) + StateDef.insert(Range.begin(), Range.end()); + else + StateUse.insert(Range.begin(), Range.end()); + } +}; + +/* + * NotifyInfo is used to pass information between LLVMTranslator, IRFactory and + * the JIT listener. + */ +class NotifyInfo { +#define MAX_CHAINSLOT 256 +public: + struct SlotInfo { + size_t Key; + uintptr_t Addr; + }; + + struct PatchInfo { + PatchInfo(unsigned ty, unsigned idx, uintptr_t addr) + : Type(ty), Idx(idx), Addr(addr) {} + unsigned Type; + unsigned Idx; + uintptr_t Addr; + }; + + NotifyInfo() : Func(nullptr) { + ChainSlot = new SlotInfo[MAX_CHAINSLOT]; + } + ~NotifyInfo() { + delete ChainSlot; + } + + Function *Func; /* LLVM Function of this translation unit */ + TCGOp *Op; + TranslationBlock *TB; + uint16_t NumInsts; + RestoreVec Restore; + unsigned NumChainSlot; + SlotInfo *ChainSlot; + + uint32_t Size; /* Size of the translated host code */ + uint8_t *Code; /* Start PC of the translated host code */ + std::vector<PatchInfo> Patches; + + void reset() { + Restore.clear(); + Patches.clear(); + NumInsts = 0; + NumChainSlot = 0; + } + unsigned setChainSlot(size_t Key) { + if (NumChainSlot >= MAX_CHAINSLOT) + hqemu_error("run out of chain slot.\n"); + unsigned Curr = NumChainSlot; + ChainSlot[NumChainSlot++].Key = Key; + return Curr; + } + uintptr_t getChainSlotAddr(unsigned Idx) { + if (NumChainSlot >= MAX_CHAINSLOT) + hqemu_error("invalid chain slot index.\n"); + return (uintptr_t)&ChainSlot[Idx].Addr; + } + void addPatch(unsigned Type, unsigned Idx, uintptr_t Addr) { + Patches.push_back(PatchInfo(Type, Idx, Addr)); + } + void setOp(TCGOp *op) { Op = op; } + void setTB(TranslationBlock *tb) { + TB = tb; + NumInsts = 0; + } + uint32_t setRestorePoint() { + uint32_t Idx = Restore.size(); + if (Idx != (uint16_t)Idx) + hqemu_error("key value too large.\n"); + Restore.push_back(std::make_pair(TB->id, NumInsts)); + return Idx; + } +}; + +/* + * LLVM Translator + */ +class LLVMTranslator { + unsigned MyID; /* Translator ID */ + CPUArchState *Env; + + /* Basic types */ + Type *VoidTy; + IntegerType *Int8Ty; + IntegerType *Int16Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *Int128Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int16PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Type *FloatTy; + Type *DoubleTy; + PointerType *FloatPtrTy; + PointerType *DoublePtrTy; + + LLVMContext Context; /* Translator local context */ + Module *Mod; /* The LLVM module */ + const DataLayout *DL; /* Data layout */ + NotifyInfo NI; /* Info to set/use by the JIT listener */ + + std::vector<BaseRegister> BaseReg; /* Reserved base registers */ + GuestBaseRegister GuestBaseReg; /* Reserved guest base register */ + FlatType StateType; /* Offset and type of guest registers */ + TCGHelperMap TCGHelpers; + HelperMap Helpers; + std::set<std::string> ConstHelpers; + SymbolMap Symbols; + + MCDisasm *GuestDisAsm; + MCDisasm *HostDisAsm; + + IRFactory *IF; /* TCG-to-LLVM IR converter */ + + /* Initialize the LLVM module. */ + void InitializeModule(); + + /* Create the JIT compiler. */ + void InitializeJIT(); + + /* Initialize required LLVM types. */ + void InitializeType(); + + /* Setup guest and host dependent structures. */ + void InitializeTarget(); + + /* Setup special registers. */ + void DefineSpecialReg(std::map<Type*, Type*> &SpecialReg); + + /* Convert the CPUArchState structure type to a list of primitive types. */ + void FlattenCPUState(Type *Ty, intptr_t &Off, std::map<Type*, Type*> &SpecialReg); + + /* Initialize helper functions. */ + void InitializeHelpers(); + + /* Analyze and optimize a helper function. */ + bool OptimizeHelper(HelperInfo &Helper); + + void InitializeDisasm(); + + void InitializeConstHelpers(); + + void Commit(TraceBuilder &Builder); + + void Abort(TraceBuilder &Builder); + + void dump(CPUArchState *env, TranslationBlock *tb); + + LLVMTranslator(unsigned id, CPUArchState *env); + +public: + ~LLVMTranslator(); + + void GenBlock(CPUArchState *env, OptimizationInfo *Opt); + void GenTrace(CPUArchState *env, OptimizationInfo *Opt); + + unsigned getID() { return MyID; } + LLVMContext *getContext() { return &Context; } + Module *getModule() { return Mod; } + NotifyInfo &getNotifyInfo() { return NI; } + std::vector<BaseRegister> &getBaseReg() { return BaseReg; } + GuestBaseRegister &getGuestBaseReg() { return GuestBaseReg; } + TCGHelperMap &getTCGHelpers() { return TCGHelpers; } + HelperMap &getHelpers() { return Helpers; } + std::set<std::string> &getConstHelpers() { return ConstHelpers; } + FlatType &getStateType() { return StateType; } + SymbolMap &getSymbols() { return Symbols; } + MCDisasm *getHostDisAsm() { return HostDisAsm;} + + void AddSymbol(std::string Name, void *FP) { + Symbols[Name] = (uintptr_t)FP; + } + + /* Create the LLVMTranslator instrance. */ + static LLVMTranslator *CreateLLVMTranslator(int id, CPUArchState *env) { + return new LLVMTranslator(id, env); + } + + /* Show guest assembly code for each compiled TB. */ + void printAsm(CPUArchState *env, TranslationBlock *tb); + + /* Show TCG micro ops for each compiled TB. */ + void printOp(CPUArchState *env, TranslationBlock *tb); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm-types.h b/src/llvm/include/llvm-types.h new file mode 100644 index 0000000..1b8d09c --- /dev/null +++ b/src/llvm/include/llvm-types.h @@ -0,0 +1,127 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_TYPES_H +#define __LLVM_TYPES_H + +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" + +#if defined(LLVM_V35) +#include "llvm/MC/MCDisassembler.h" +#include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MemoryObject.h" +#elif defined(LLVM_V38) +#include "llvm/MC/MCDisassembler.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MemoryObject.h" +#elif defined(LLVM_V39) +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/MemoryObject.h" +#else +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#endif + +#include <vector> +#include <set> +#include <map> +#include "llvm-macro.h" +#include "qemu-types.h" + +using namespace llvm; + +class HelperInfo; + +typedef std::vector<TranslationBlock *> TBVec; +typedef std::vector<std::pair<BlockID, uint16_t> > RestoreVec; +typedef std::map<uintptr_t, std::string> TCGHelperMap; /* <func_ptr, func_name> */ +typedef std::map<std::string, HelperInfo*> HelperMap; +typedef std::map<std::string, uintptr_t> SymbolMap; +typedef std::map<intptr_t, Type *> FlatType; /* <state_off, state_ty> */ +typedef std::vector<Instruction *> IVec; +typedef std::vector<BasicBlock *> BBVec; + + +static inline const DataLayout *getDataLayout(Module *Mod) { +#if defined(LLVM_V35) + return Mod->getDataLayout(); +#else + return &Mod->getDataLayout(); +#endif +} + +static inline AllocaInst *CreateAlloca(Type *Ty, unsigned AddrSpace, + const Twine &Name, + Instruction *InsertBefore = nullptr) { +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + return new AllocaInst(Ty, Name, InsertBefore); +#else + return new AllocaInst(Ty, AddrSpace, Name, InsertBefore); +#endif +} + +static inline AllocaInst *CreateAlloca(Type *Ty, unsigned AddrSpace, + Value *ArraySize = nullptr, + const Twine &Name = "", + Instruction *InsertBefore = nullptr) { +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + return new AllocaInst(Ty, ArraySize, Name, InsertBefore); +#else + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, InsertBefore); +#endif +} + +static inline void InlineFunc(CallInst *CI) { +#if defined(LLVM_V38) || defined(LLVM_V39) + AssumptionCacheTracker ACT; + InlineFunctionInfo IFI(nullptr, &ACT); +#else + InlineFunctionInfo IFI; +#endif + InlineFunction(CI, IFI); +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/llvm.h b/src/llvm/include/llvm.h new file mode 100644 index 0000000..67bff2f --- /dev/null +++ b/src/llvm/include/llvm.h @@ -0,0 +1,278 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_H +#define __LLVM_H + +#include <memory> +#include <vector> +#include "llvm/ADT/STLExtras.h" +#include "llvm-types.h" +#include "llvm-debug.h" +#include "utils.h" + +#if defined(ENABLE_MCJIT) +#include "llvm/ExecutionEngine/MCJIT.h" +#include "MCJITMemoryManager.h" +typedef class DefaultMCJITMemoryManager MemoryManager; +#else +#if defined(LLVM_V35) +#include "JIT.h" +#include "JITMemoryManager.h" +#else +# error "LLVM version >3.5 supports MCJIT only. ENABLE_MCJIT must be enabled." +#endif +typedef class DefaultJITMemoryManager MemoryManager; +#endif + + +extern cl::OptionCategory CategoryHQEMU; + +class LLVMTranslator; +class OptimizationInfo; +class TranslatedCode; + +typedef std::unique_ptr<OptimizationInfo> OptRequest; + + +/* + * LLVMEnv is the top level container of whole LLVM translation environment + * which manages the LLVM translator(s) and globally shared resources. The + * LLVMEnv instance must be initialized before using the underlying transaltion + * service and can only be initialized ONCE. + */ +class LLVMEnv { +public: + typedef std::vector<TranslatedCode *> TransCodeList; + typedef std::map<uintptr_t, TranslatedCode *> TransCodeMap; + typedef std::vector<uintptr_t> ChainSlot; + typedef std::pair<size_t, uintptr_t> SlotInfo; + +private: + std::shared_ptr<MemoryManager> MM; /* Trace cache manager */ + unsigned NumTranslator; /* The amount of LLVM translators */ + std::vector<LLVMTranslator *> Translator; /* LLVM translators */ + std::vector<pthread_t> HelperThread; /* LLVM translation threads */ + std::vector<CPUState *> ThreadEnv; + + TransCodeList TransCode; /* Translated traces. */ + TransCodeMap SortedCode; /* Sorted traces in code cache address order. */ + ChainSlot ChainPoint; /* Address of stubs for trace-to-block linking */ + + bool UseThreading; /* Whether multithreaded translators are used or not. */ + unsigned NumFlush; + + LLVMEnv(); + + /* Parse the command line options. */ + void ParseCommandLineOptions(); + + /* Test whether HQEMU is running in Intel VTune. */ + void ProbeIntelVTune(); + +public: + QemuMutex mutex; + + ~LLVMEnv(); + + /* Start/stop/restart LLVM translators and worker threads. */ + void CreateTranslator(); + void DeleteTranslator(); + void RestartTranslator(); + void StartThread(); + void StopThread(); + + /* Get the LLVM translator with index. */ + LLVMTranslator *getTranslator(unsigned ID) { + if (ID >= Translator.size()) + hqemu_error("invalid translator ID.\n"); + return Translator[ID]; + } + + /* Acquire and lock the first LLVM translator. */ + LLVMTranslator *AcquireSingleTranslator(); + + /* Release the first LLVM translator. */ + void ReleaseSingleTranslator(); + + /* Get CPUState of the LLVM translator with index. */ + CPUState *getThreadEnv(int ID) { return ThreadEnv[ID]; } + + std::vector<pthread_t> &getHelperThread() { return HelperThread; } + std::shared_ptr<MemoryManager> getMemoryManager() { return MM; } + TransCodeList &getTransCode() { return TransCode; } + TransCodeMap &getSortedCode() { return SortedCode; } + ChainSlot &getChainPoint() { return ChainPoint; } + TraceID insertTransCode(TranslatedCode *TC); + SlotInfo getChainSlot(); + + bool isThreading() { return UseThreading; } + void incNumFlush() { NumFlush++; } + unsigned getNumFlush() { return NumFlush; } + + /* + * static public members + */ + static bool InitOnce; /* LLVMEnv is initialized or not? */ + static int TransMode; + static uint8_t *TraceCache; + static size_t TraceCacheSize; + static bool RunWithVTune; + + static void CreateLLVMEnv(); + static void DeleteLLVMEnv(); + static int OptimizeBlock(CPUArchState *env, OptRequest Request); + static int OptimizeTrace(CPUArchState *env, OptRequest Request); + static void setTransMode(int Mode) { TransMode = Mode; } + static int isTraceMode() { + return (TransMode == TRANS_MODE_HYBRIDS || + TransMode == TRANS_MODE_HYBRIDM); + } +}; + +class QueueManager { + std::vector<Queue *> ActiveQueue; + Queue *CurrentQueue; + +public: + QueueManager(); + ~QueueManager(); + void Enqueue(OptimizationInfo *Opt); + void *Dequeue(); + void Flush(); +}; + +/* + * OptimizationInfo is the description to an optimization request. It consists + * of the optimization mode and the control-flow-graph of the trace. + */ +class OptimizationInfo { +public: + typedef std::set<TranslationBlock *> TraceNode; + typedef std::map<TranslationBlock *, TraceNode> TraceEdge; + + ~OptimizationInfo() { + if (CFG) + GraphNode::DeleteCFG(CFG); + } + + void ComposeCFG(); + GraphNode *getCFG() { return CFG; } + bool isTrace() { return !isBlock; } + + static OptRequest CreateRequest(TranslationBlock *tb) { + return OptRequest(new OptimizationInfo(tb)); + } + static OptRequest CreateRequest(TBVec &trace, int idx) { + return OptRequest(new OptimizationInfo(trace, idx)); + } + static OptRequest CreateRequest(TranslationBlock *head, TraceEdge &edges) { + return OptRequest(new OptimizationInfo(head, edges)); + } + +private: + TBVec Trace; /* Trace of a list of TBs */ + int LoopHeadIdx; /* Index to the loopback block */ + bool isUserTrace; /* Trace of all user-mode blocks */ + bool isBlock; /* Trace of a single block */ + GraphNode *CFG; /* CFG of the trace */ + + OptimizationInfo(TranslationBlock *tb) + : isUserTrace(true), isBlock(true) { + Trace.push_back(tb); + LoopHeadIdx = -1; + CFG = new GraphNode(tb); + } + OptimizationInfo(TBVec &trace, int idx) + : isUserTrace(true), isBlock(false), CFG(nullptr) { + if (trace.empty()) + hqemu_error("trace length cannot be zero.\n"); + Trace = trace; + LoopHeadIdx = idx; + } + OptimizationInfo(TranslationBlock *HeadTB, TraceEdge &Edges); + + void SearchCycle(TraceNode &SearchNodes, TraceNode &Nodes, + TraceEdge &Edges, TBVec &Visited, int Depth); + void ExpandTrace(TranslationBlock *HeadTB, TraceEdge &Edges); +}; + +class TraceInfo { +public: + TBVec TBs; + unsigned NumLoop; + unsigned NumExit; + unsigned NumIndirectBr; + uint64_t **ExecCount; + uint64_t TransTime; + uint32_t Attribute; + + TraceInfo(NodeVec &Nodes, uint32_t Attr = A_None) + : NumLoop(0), NumExit(0), NumIndirectBr(0), ExecCount(nullptr), + TransTime(0), Attribute(Attr) + { + if (Nodes.empty()) + hqemu_error("number of nodes cannot be zero.\n"); + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + TBs.push_back(Nodes[i]->getTB()); + } + + TranslationBlock *getEntryTB() { return TBs[0]; } + target_ulong getEntryPC() { return TBs[0]->pc; } + unsigned getNumBlock() { return TBs.size(); } + void setTransTime(struct timeval *start, struct timeval *end) { + struct timeval t; + timersub(end, start, &t); + TransTime = t.tv_sec * 1e6 + t.tv_usec; + } + bool hasAttribute(uint32_t Attr) { + return Attribute & Attr; + } +}; + +struct ChainInfo { + std::vector<uintptr_t> Chains; + std::vector<BlockID> DepTraces; + + void insertChain(uintptr_t addr) { + Chains.push_back(addr); + } + void insertDepTrace(BlockID id) { + DepTraces.push_back(id); + } + static ChainInfo *get(TranslationBlock *tb) { + if (!tb->chain) + tb->chain = (ChainInfo *)new ChainInfo; + return (ChainInfo *)tb->chain; + } + static void free(TranslationBlock *tb) { + delete (ChainInfo *)tb->chain; + tb->chain = nullptr; + } +}; + +class TranslatedCode { +public: + TranslatedCode() : Trace(nullptr), SampleCount(0) {} + ~TranslatedCode() { + if (Trace) + delete Trace; + } + + bool Active; + uint32_t Size; /* Size of the translated host code */ + uint8_t *Code; /* Start PC of the translated host code */ + TranslationBlock *EntryTB; /* The entry block of the region */ + RestoreVec Restore; + TraceInfo *Trace; + uint64_t SampleCount; +}; + + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/optimization.h b/src/llvm/include/optimization.h new file mode 100644 index 0000000..bdafb3a --- /dev/null +++ b/src/llvm/include/optimization.h @@ -0,0 +1,261 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __OPTIMIZATION_H +#define __OPTIMIZATION_H + +#include <iostream> +#include <list> +#include "qemu-types.h" + + +extern "C" TranslationBlock *tbs; + +/* + * Instruction TLB (iTLB) + */ +#define ITLB_CACHE_BITS (10) +#define ITLB_CACHE_SIZE (1U << ITLB_CACHE_BITS) +#define ITLB_CACHE_MASK (ITLB_CACHE_SIZE - 1) + +class ITLB { + struct itlb_t { tb_page_addr_t paddr; }; + itlb_t Cache[ITLB_CACHE_SIZE]; + +public: + ITLB() { reset(); } + ~ITLB() {} + + inline itlb_t &cache(target_ulong vaddr) { + return Cache[(vaddr >> TARGET_PAGE_BITS) & ITLB_CACHE_MASK]; + } + void reset() { + for (unsigned i = 0; i < ITLB_CACHE_SIZE; ++i) + Cache[i].paddr = (tb_page_addr_t)-1; + } + void flush(target_ulong vaddr) { + cache(vaddr).paddr = (tb_page_addr_t)-1; + } + void insert(target_ulong vaddr, tb_page_addr_t paddr) { + cache(vaddr).paddr = paddr; + } + tb_page_addr_t get(target_ulong vaddr) { + return cache(vaddr).paddr; + } +}; + + +/* + * Indirect Branch Target Cache (IBTC) + */ +#define IBTC_CACHE_BITS (16) +#define IBTC_CACHE_SIZE (1U << IBTC_CACHE_BITS) +#define IBTC_CACHE_MASK (IBTC_CACHE_SIZE - 1) + +class IBTC { + typedef std::pair<target_ulong, TranslationBlock *> ibtc_t; + ibtc_t Cache[IBTC_CACHE_SIZE]; + bool NeedUpdate; + uint64_t Total; /* Total access count */ + uint64_t Miss; /* Miss count */ + +public: + IBTC() : NeedUpdate(false), Total(0), Miss(0) { reset(); } + ~IBTC() {} + + inline ibtc_t &cache(target_ulong pc) { + return Cache[(pc >> 2) & IBTC_CACHE_MASK]; + } + void reset() { + for (unsigned i = 0; i < IBTC_CACHE_SIZE; ++i) + Cache[i].first = (target_ulong)-1; + } + void remove(TranslationBlock *tb) { + ibtc_t &c = cache(tb->pc); + if (c.first == tb->pc) + c.first = (target_ulong)-1; + } + void insert(target_ulong pc, TranslationBlock *tb) { + cache(pc) = std::make_pair(pc, tb); + } + TranslationBlock *get(target_ulong pc) { + ibtc_t &c = cache(pc); + return (c.first == pc) ? c.second : nullptr; + } + void setUpdate() { NeedUpdate = true; } + void resetUpdate() { NeedUpdate = false; } + bool needUpdate() { return NeedUpdate; } + inline void incTotal() { Total++; } + inline void incMiss() { Miss++; } + void dump() { + double HitRate = (double)(Total - Miss) * 100 / Total; + std::cerr << "\nibtc.miss = " << Miss << "/" << Total << + " (hit rate=" << HitRate << "%)\n"; + } +}; + +/* + * Cross-Page Block Linking (CPBL) + */ +class CPBL { + uint64_t Total; /* Total access count */ + uint64_t Miss; /* Miss count */ + uint64_t ValidateTotal; /* Total validation count */ + uint64_t ValidateMiss; /* Miss validation count */ +public: + CPBL() : Total(0), Miss(0), ValidateTotal(0), ValidateMiss(0) {} + + inline void incTotal() { Total++; } + inline void incMiss() { Miss++; } + inline void incValidateTotal() { ValidateTotal++; } + inline void incValidateMiss() { ValidateMiss++; } + void dump() { + double HitRate = (double)(Total - Miss) * 100 / Total; + double HitRate2 = (double)(ValidateTotal - ValidateMiss) * 100 / Total; + std::cerr << "cpbl.miss = " << Miss << "/" << Total << + " (hit rate=" << HitRate << "%)\n" << + "validate.miss = " << ValidateMiss << "/" << ValidateTotal << + " (hit rate=" << HitRate2 << "%)\n"; + } +}; + +/* + * Large Page Table + * + * This handling is to track every large page created by the guest system. + * Once a `possibly' large page is invalidated, do a search with the tracked + * pages to determine if it is really a large page invalidation. If it cannot + * be found, this is a false alert and we can fall back to the default-size + * page flushing. Otherwise, SoftTLB, IBTC/CPBL optimization, etc. are + * partial or full cleanup due to the true large page flushing. + */ +#define MAX_NUM_LARGEPAGE (1024) + +class LargePageTable { + typedef std::pair<target_ulong, target_ulong> PTE; + typedef std::list<PTE> PTEList; + PTEList Used; + PTEList Free; + CPUState *CS; + uint64_t Total; + uint64_t Miss; + +public: + LargePageTable(CPUState *cpu) : Total(0), Miss(0) { + CS = cpu; + Used.clear(); + Free.resize(MAX_NUM_LARGEPAGE); + } + ~LargePageTable() {} + + enum { + SEARCH = 0, + FLUSH, + }; + + void reset() { + Free.splice(Free.end(), Used); + } + void remove(PTEList::iterator I) { + Free.splice(Free.begin(), Used, I); + } + void allocate(PTE pte) { + /* If the free list is empty, we need to clear softtlb by calling + * tlb_flush() which will then invoke LTP::reset() to clear LPT. */ + if (Free.empty()) + tlb_flush(CS, 0); + Free.front() = pte; + Used.splice(Used.begin(), Free, Free.begin()); + } + void insert(target_ulong addr, target_ulong size) { + for (PTEList::iterator I = Used.begin(), E = Used.end(); I != E; ++I) { + if (I->first == (addr & I->second)) { + Used.splice(Used.begin(), Used, I); + return; + } + } + target_ulong mask = ~(size - 1); + allocate(PTE(addr & mask, mask)); + } + bool search(target_ulong addr, bool mode, target_ulong *addrp, + target_ulong *sizep) { + for (PTEList::iterator I = Used.begin(), E = Used.end(); I != E; ++I) { + if (I->first != (addr & I->second)) + continue; + *addrp = I->first; + *sizep = ~I->second + 1; + if (mode == FLUSH) + remove(I); + return true; + } + return false; + } + void incTotal() { Total++; } + void incMiss() { Miss++; } + void dump() { + double Rate = (double)(Total - Miss) * 100 / Total; + std::cerr << "lpt.miss = " << Miss << "/" << Total << + " (false flushing=" << Rate << "% #pages=" << + Used.size() << ")\n"; + } +}; + + +class BaseTracer; + +struct CPUOptimization { + CPUOptimization(CPUState *cpu, BaseTracer *tracer) + : lpt(LargePageTable(cpu)), pt(tracer) {} + + ITLB itlb; /* instruction TLB */ + IBTC ibtc; /* indirect branch target cache */ + CPBL cpbl; /* cross-page block linking */ + LargePageTable lpt; /* large page handling */ + BaseTracer *pt; /* processor tracer */ +}; + + +static inline int isUserTB(TranslationBlock *tb) { + int is_user = 1; +#if defined(CONFIG_SOFTMMU) +#if defined(TARGET_ALPHA) + is_user = (tb->flags & TB_FLAGS_USER_MODE); +#elif defined(TARGET_ARM) + is_user = ((ARM_TBFLAG_MMUIDX(tb->flags) & 3) == 0); +#elif defined(TARGET_I386) + is_user = ((tb->flags >> HF_CPL_SHIFT) & 3) == 3; +#elif defined(TARGET_MIPS) + is_user = (tb->flags & MIPS_HFLAG_UM); +#elif defined(TARGET_PPC) + is_user = ((tb->flags >> MSR_PR) & 1); +#else +#error "unsupported processor type" +#endif +#endif + return is_user; +} + +static inline ITLB &cpu_get_itlb(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->itlb; +} +static inline IBTC &cpu_get_ibtc(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->ibtc; +} +static inline CPBL &cpu_get_cpbl(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->cpbl; +} +static inline LargePageTable &cpu_get_lpt(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->lpt; +} +static inline BaseTracer *cpu_get_tracer(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->pt; +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/pmu/arm/arm-events.h b/src/llvm/include/pmu/arm/arm-events.h new file mode 100644 index 0000000..b3bb1d7 --- /dev/null +++ b/src/llvm/include/pmu/arm/arm-events.h @@ -0,0 +1,35 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __ARM_EVENTS_H +#define __ARM_EVENTS_H + +#include <vector> +#include "pmu/pmu.h" + +namespace pmu { + +class PMUEvent; + +#if defined(__arm__) +#define pmu_mb() ((void(*)(void))0xffff0fa0)() +#define pmu_rmb() ((void(*)(void))0xffff0fa0)() +#define pmu_wmb() ((void(*)(void))0xffff0fa0)() +#elif defined(__aarch64__) +#define pmu_mb() asm volatile("dmb ish" ::: "memory") +#define pmu_rmb() asm volatile("dmb ishld" ::: "memory") +#define pmu_wmb() asm volatile("dmb ishst" ::: "memory") +#endif + + +int ARMInit(void); + +} /* namespace pmu */ + +#endif /* __ARM_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/pmu/perf_event.h b/src/llvm/include/pmu/perf_event.h new file mode 100644 index 0000000..81fed4a --- /dev/null +++ b/src/llvm/include/pmu/perf_event.h @@ -0,0 +1,992 @@ +/* + * This file is copied from linux-4.11/include/uapi/linux/perf_event.h. + * + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _UAPI_LINUX_PERF_EVENT_H +#define _UAPI_LINUX_PERF_EVENT_H + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + PERF_COUNT_HW_CACHE_NODE = 6, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, + PERF_COUNT_SW_DUMMY = 9, + PERF_COUNT_SW_BPF_OUTPUT = 10, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, + PERF_SAMPLE_WEIGHT = 1U << 14, + PERF_SAMPLE_DATA_SRC = 1U << 15, + PERF_SAMPLE_IDENTIFIER = 1U << 16, + PERF_SAMPLE_TRANSACTION = 1U << 17, + PERF_SAMPLE_REGS_INTR = 1U << 18, + + PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ +}; + +/* + * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * + * If the user does not pass priv level information via branch_sample_type, + * the kernel uses the event's priv level. Branch and event priv levels do + * not have to match. Branch priv level is checked for permissions. + * + * The branch types can be combined, however BRANCH_ANY covers all types + * of branches and therefore it supersedes all the other types. + */ +enum perf_branch_sample_type_shift { + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ + PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ + + PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ + PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ + + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ +}; + +enum perf_branch_sample_type { + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, +}; + +#define PERF_SAMPLE_BRANCH_PLM_ALL \ + (PERF_SAMPLE_BRANCH_USER|\ + PERF_SAMPLE_BRANCH_KERNEL|\ + PERF_SAMPLE_BRANCH_HV) + +/* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + +/* + * Values for the memory transaction event qualifier, mostly for + * abort events. Multiple bits can be set. + */ +enum { + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + + PERF_TXN_MAX = (1 << 8), /* non-ABI */ + + /* bits 32..63 are reserved for the abort code */ + + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, +}; + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ +#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ + /* add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + * + * @sample_max_stack: Max number of frame pointers in a callchain, + * should be < /proc/sys/kernel/perf_event_max_stack + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + uint32_t type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + uint32_t size; + + /* + * Type specific configuration information. + */ + uint64_t config; + + union { + uint64_t sample_period; + uint64_t sample_freq; + }; + + uint64_t sample_type; + uint64_t read_format; + + uint64_t disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + /* + * precise_ip: + * + * 0 - SAMPLE_IP can have arbitrary skid + * 1 - SAMPLE_IP must have constant skid + * 2 - SAMPLE_IP requested to have 0 skid + * 3 - SAMPLE_IP must have 0 skid + * + * See also PERF_RECORD_MISC_EXACT_IP + */ + precise_ip : 2, /* skid constraint */ + mmap_data : 1, /* non-exec mmap data */ + sample_id_all : 1, /* sample_type all events */ + + exclude_host : 1, /* don't count in host */ + exclude_guest : 1, /* don't count in guest */ + + exclude_callchain_kernel : 1, /* exclude kernel callchains */ + exclude_callchain_user : 1, /* exclude user callchains */ + mmap2 : 1, /* include mmap with inode data */ + comm_exec : 1, /* flag comm events that are due to an exec */ + use_clockid : 1, /* use @clockid for time fields */ + context_switch : 1, /* context switch data */ + write_backward : 1, /* Write ring buffer from end to beginning */ + __reserved_1 : 36; + + union { + uint32_t wakeup_events; /* wakeup every n events */ + uint32_t wakeup_watermark; /* bytes before wakeup */ + }; + + uint32_t bp_type; + union { + uint64_t bp_addr; + uint64_t config1; /* extension of config */ + }; + union { + uint64_t bp_len; + uint64_t config2; /* extension of config1 */ + }; + uint64_t branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + uint64_t sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + uint32_t sample_stack_user; + + int32_t clockid; + /* + * Defines set of regs to dump for each sample + * state captured on: + * - precise = 0: PMU interrupt + * - precise > 0: sampled instruction + * + * See asm/perf_regs.h for details. + */ + uint64_t sample_regs_intr; + + /* + * Wakeup watermark for AUX area + */ + uint32_t aux_watermark; + uint16_t sample_max_stack; + uint16_t __reserved_2; /* align to uint64_t */ +}; + +#define perf_flags(attr) (*(&(attr)->read_format + 1)) + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, uint64_t) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, uint64_t *) +#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, uint32_t) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, uint32_t) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + uint32_t version; /* version number of this structure */ + uint32_t compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq, time_mult, time_shift, index, width; + * u64 count, enabled, running; + * u64 cyc, time_offset; + * s64 pmc = 0; + * + * do { + * seq = pc->lock; + * barrier() + * + * enabled = pc->time_enabled; + * running = pc->time_running; + * + * if (pc->cap_usr_time && enabled != running) { + * cyc = rdtsc(); + * time_offset = pc->time_offset; + * time_mult = pc->time_mult; + * time_shift = pc->time_shift; + * } + * + * index = pc->index; + * count = pc->offset; + * if (pc->cap_user_rdpmc && index) { + * width = pc->pmc_width; + * pmc = rdpmc(index - 1); + * } + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + uint32_t lock; /* seqlock for synchronization */ + uint32_t index; /* hardware event identifier */ + int64_t offset; /* add to hardware event value */ + uint64_t time_enabled; /* time event active */ + uint64_t time_running; /* time event on cpu */ + union { + uint64_t capabilities; + struct { + uint64_t cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ + cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ + + cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ + cap_user_time : 1, /* The time_* fields are used */ + cap_user_time_zero : 1, /* The time_zero field is used */ + cap_____res : 59; + }; + }; + + /* + * If cap_user_rdpmc this field provides the bit-width of the value + * read using the rdpmc() or equivalent instruction. This can be used + * to sign extend the result like: + * + * pmc <<= 64 - width; + * pmc >>= 64 - width; // signed shift right + * count += pmc; + */ + uint16_t pmc_width; + + /* + * If cap_usr_time the below fields can be used to compute the time + * delta since time_enabled (in ns) using rdtsc or similar. + * + * u64 quot, rem; + * u64 delta; + * + * quot = (cyc >> time_shift); + * rem = cyc & (((u64)1 << time_shift) - 1); + * delta = time_offset + quot * time_mult + + * ((rem * time_mult) >> time_shift); + * + * Where time_offset,time_mult,time_shift and cyc are read in the + * seqcount loop described above. This delta can then be added to + * enabled and possible running (if index), improving the scaling: + * + * enabled += delta; + * if (index) + * running += delta; + * + * quot = count / running; + * rem = count % running; + * count = quot * enabled + (rem * enabled) / running; + */ + uint16_t time_shift; + uint32_t time_mult; + uint64_t time_offset; + /* + * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated + * from sample timestamps. + * + * time = timestamp - time_zero; + * quot = time / time_mult; + * rem = time % time_mult; + * cyc = (quot << time_shift) + (rem << time_shift) / time_mult; + * + * And vice versa: + * + * quot = cyc >> time_shift; + * rem = cyc & (((u64)1 << time_shift) - 1); + * timestamp = time_zero + quot * time_mult + + * ((rem * time_mult) >> time_shift); + */ + uint64_t time_zero; + uint32_t size; /* Header size up to __reserved[] fields. */ + + /* + * Hole for extension of the self monitor capabilities + */ + + uint8_t __reserved[118*8+4]; /* align to 1k. */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. + * + * data_{offset,size} indicate the location and size of the perf record + * buffer within the mmapped area. + */ + uint64_t data_head; /* head in the data section */ + uint64_t data_tail; /* user-space written tail */ + uint64_t data_offset; /* where the buffer starts */ + uint64_t data_size; /* data buffer size */ + + /* + * AUX area is defined by aux_{offset,size} fields that should be set + * by the userspace, so that + * + * aux_offset >= data_offset + data_size + * + * prior to mmap()ing it. Size of the mmap()ed area should be aux_size. + * + * Ring buffer pointers aux_{head,tail} have the same semantics as + * data_{head,tail} and same ordering rules apply. + */ + uint64_t aux_head; + uint64_t aux_tail; + uint64_t aux_offset; + uint64_t aux_size; +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) +#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) +#define PERF_RECORD_MISC_GUEST_USER (5 << 0) + +/* + * Indicates that /proc/PID/maps parsing are truncated by time out. + */ +#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + * Ditto PERF_RECORD_MISC_SWITCH_OUT. + */ +#define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) +/* + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + */ +#define PERF_RECORD_MISC_EXACT_IP (1 << 14) +/* + * Reserve the last bit to indicate some extended misc field + */ +#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) + +struct perf_event_header { + uint32_t type; + uint16_t misc; + uint16_t size; +}; + +enum perf_event_type { + + /* + * If perf_event_attr.sample_id_all is set then all event types will + * have the sample_type selected fields related to where/when + * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU, + * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed + * just after the perf_event_header and the fields already present for + * the existing fields, i.e. at the end of the payload. That way a newer + * perf.data file will be supported by older perf tools, with these new + * optional fields being ignored. + * + * struct sample_id { + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 id; } && PERF_SAMPLE_IDENTIFIER + * } && perf_event_attr::sample_id_all + * + * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The + * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed + * relative to header.size. + */ + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * # + * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. + * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position + * # is fixed relative to header. + * # + * + * { u64 id; } && PERF_SAMPLE_IDENTIFIER + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * + * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { u64 data_src; } && PERF_SAMPLE_DATA_SRC + * { u64 transaction; } && PERF_SAMPLE_TRANSACTION + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR + * }; + */ + PERF_RECORD_SAMPLE = 9, + + /* + * The MMAP2 records are an augmented version of MMAP, they add + * maj, min, ino numbers to be used to uniquely identify each mapping + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * u32 prot, flags; + * char filename[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_MMAP2 = 10, + + /* + * Records that new data landed in the AUX buffer part. + * + * struct { + * struct perf_event_header header; + * + * u64 aux_offset; + * u64 aux_size; + * u64 flags; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX = 11, + + /* + * Indicates that instruction trace has started + * + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * }; + */ + PERF_RECORD_ITRACE_START = 12, + + /* + * Records the dropped/lost sample number. + * + * struct { + * struct perf_event_header header; + * + * u64 lost; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_LOST_SAMPLES = 13, + + /* + * Records a context switch in or out (flagged by + * PERF_RECORD_MISC_SWITCH_OUT). See also + * PERF_RECORD_SWITCH_CPU_WIDE. + * + * struct { + * struct perf_event_header header; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH = 14, + + /* + * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and + * next_prev_tid that are the next (switching out) or previous + * (switching in) pid/tid. + * + * struct { + * struct perf_event_header header; + * u32 next_prev_pid; + * u32 next_prev_tid; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH_CPU_WIDE = 15, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 + +enum perf_callchain_context { + PERF_CONTEXT_HV = (uint64_t)-32, + PERF_CONTEXT_KERNEL = (uint64_t)-128, + PERF_CONTEXT_USER = (uint64_t)-512, + + PERF_CONTEXT_GUEST = (uint64_t)-2048, + PERF_CONTEXT_GUEST_KERNEL = (uint64_t)-2176, + PERF_CONTEXT_GUEST_USER = (uint64_t)-2560, + + PERF_CONTEXT_MAX = (uint64_t)-4095, +}; + +/** + * PERF_RECORD_AUX::flags bits + */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ + +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ + +union perf_mem_data_src { + uint64_t val; + struct { + uint64_t mem_op:5, /* type of opcode */ + mem_lvl:14, /* memory hierarchy level */ + mem_snoop:5, /* snoop mode */ + mem_lock:2, /* lock instr */ + mem_dtlb:7, /* tlb access */ + mem_rsvd:31; + }; +}; + +/* type of opcode (load/store/prefetch,code) */ +#define PERF_MEM_OP_NA 0x01 /* not available */ +#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ +#define PERF_MEM_OP_STORE 0x04 /* store instruction */ +#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ +#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ +#define PERF_MEM_OP_SHIFT 0 + +/* memory hierarchy (memory level, hit or miss) */ +#define PERF_MEM_LVL_NA 0x01 /* not available */ +#define PERF_MEM_LVL_HIT 0x02 /* hit level */ +#define PERF_MEM_LVL_MISS 0x04 /* miss level */ +#define PERF_MEM_LVL_L1 0x08 /* L1 */ +#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x20 /* L2 */ +#define PERF_MEM_LVL_L3 0x40 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +/* snoop mode */ +#define PERF_MEM_SNOOP_NA 0x01 /* not available */ +#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ +#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +/* locked instruction */ +#define PERF_MEM_LOCK_NA 0x01 /* not available */ +#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 + +/* TLB access */ +#define PERF_MEM_TLB_NA 0x01 /* not available */ +#define PERF_MEM_TLB_HIT 0x02 /* hit level */ +#define PERF_MEM_TLB_MISS 0x04 /* miss level */ +#define PERF_MEM_TLB_L1 0x08 /* L1 */ +#define PERF_MEM_TLB_L2 0x10 /* L2 */ +#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 + +#define PERF_MEM_S(a, s) \ + (((uint64_t)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) + +/* + * single taken branch record layout: + * + * from: source instruction (may not always be a branch insn) + * to: branch target + * mispred: branch target was mispredicted + * predicted: branch target was predicted + * + * support for mispred, predicted is optional. In case it + * is not supported mispred = predicted = 0. + * + * in_tx: running in a hardware transaction + * abort: aborting a hardware transaction + * cycles: cycles from last branch (or 0 if not supported) + */ +struct perf_branch_entry { + uint64_t from; + uint64_t to; + uint64_t mispred:1, /* target mispredicted */ + predicted:1,/* target predicted */ + in_tx:1, /* in transaction */ + abort:1, /* transaction abort */ + cycles:16, /* cycle count to last branch */ + reserved:44; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/src/llvm/include/pmu/pmu-events.h b/src/llvm/include/pmu/pmu-events.h new file mode 100644 index 0000000..2c31ae9 --- /dev/null +++ b/src/llvm/include/pmu/pmu-events.h @@ -0,0 +1,131 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PMU_EVENTS_H +#define __PMU_EVENTS_H + +#include <list> +#include <vector> +#include <signal.h> +#include "pmu-global.h" +#include "pmu.h" + +namespace pmu { + +#define PMU_MAX_EVENTS (1024) + +class Timer; + +/* Mode of the event. */ +enum { + MODE_NONE = 0, + MODE_COUNTER = ((uint32_t)1U << 1), + MODE_SAMPLE = ((uint32_t)1U << 2), + MODE_SAMPLE_IP = ((uint32_t)1U << 3), + MODE_SAMPLE_READ = ((uint32_t)1U << 4), +}; + +/* State of the event. */ +enum { + STATE_STOP = 0, + STATE_START = ((uint32_t)1U << 1), + STATE_GOTO_STOP = ((uint32_t)1U << 2), + STATE_GOTO_START = ((uint32_t)1U << 3), +}; + +/* Sampling mmap buffer information. */ +struct MMap { + void *Base; + uint64_t Size; + uint64_t Prev; +}; + +/* Event. */ +struct PMUEvent { + PMUEvent() : Hndl(0), Mode(MODE_NONE), State(STATE_STOP) {} + + Handle Hndl; /* Unique handle value */ + int Mode; /* Event mode */ + int State; /* Current event state */ + std::vector<int> FD; /* Opened fd(s) of this event */ + MMap Data; /* mmap data info */ + MMap Aux; /* mmap aux info */ + uint64_t Watermark; /* The bytes before wakeup */ + /* Overflow handling function pointer */ + union { + void *OverflowHandler; + SampleHandlerTy SampleHandler; + }; + void *Opaque; /* Opaque pointer passed to the overflow handler. */ + + int getFD() { return FD[0]; } /* Group leader fd */ +}; + +/* + * Event Manager. + */ +class EventManager { + typedef std::list<PMUEvent *> EventList; + + PMUEvent Events[PMU_MAX_EVENTS]; /* Pre-allocated events */ + EventList FreeEvents; /* Free events */ + EventList SampleEvents; /* Sampling events */ + Timer *EventTimer; /* Timer for sampling events. */ + std::vector<PMUEvent *> ChangedEvents; + +public: + EventManager(); + ~EventManager(); + + /* Return the event of the input handle. */ + PMUEvent *GetEvent(Handle Hndl); + + /* Add a counting event and return its handle. */ + Handle AddEvent(int fd); + + /* Add a sampling event and return its handle. */ + Handle AddSampleEvent(unsigned NumFDs, int *FD, uint64_t DataSize, void *Data, + uint32_t Mode, SampleConfig &Config); + + /* Notify that an event is started. */ + void StartEvent(PMUEvent *Event, bool ShouldLock = true); + + /* Notify that an event is stopped. */ + void StopEvent(PMUEvent *Event, bool ShouldLock = true); + + /* Notify that an event is deleted. */ + void DeleteEvent(PMUEvent *Event); + + /* Stop the event manager. */ + void Pause(); + + /* Restart the event manager. */ + void Resume(); + + friend void DefaultHandler(int signum, siginfo_t *info, void *data); +}; + +/* Interval timer. */ +class Timer { + timer_t T; + +public: + Timer(int Signum, int TID); + ~Timer(); + + /* Start a timer that expires just once. */ + void Start(); + + /* Stop a timer.*/ + void Stop(); +}; + +} /* namespace pmu */ + +#endif /* __PMU_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/pmu/pmu-global.h b/src/llvm/include/pmu/pmu-global.h new file mode 100644 index 0000000..ed059a4 --- /dev/null +++ b/src/llvm/include/pmu/pmu-global.h @@ -0,0 +1,52 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PMU_GLOBAL_H +#define __PMU_GLOBAL_H + +#if defined(__i386__) || defined(__x86_64__) +#include "pmu/x86/x86-events.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "pmu/arm/arm-events.h" +#elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) +#include "pmu/ppc/ppc-events.h" +#endif + +#include "pmu/pmu-utils.h" +#include "pmu/pmu.h" + +namespace pmu { + +#define PMU_SIGNAL_NUM SIGIO +#define PMU_SAMPLE_PERIOD 1e6 +#define PMU_SAMPLE_PAGES 4 + +class EventManager; + +/* Pre-defined event identity. */ +struct EventID { + int Type; /* Perf major type: hardware/software/etc */ + int Config; /* Perf type specific configuration information */ +}; + +/* System-wide configuration. */ +struct GlobalConfig { + int PageSize; /* Host page size */ + int SignalReceiver; /* TID of the signal receiver */ + uint32_t Timeout; /* Timer period in nanosecond */ + int PerfVersion; /* Perf version used in this PMU tool */ + int OSPerfVersion; /* Perf version used in the OS kernel */ +}; + +extern EventManager *EventMgr; +extern GlobalConfig SysConfig; + +} /* namespace pmu */ + +#endif /* __PMU_GLOBAL_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/pmu/pmu-utils.h b/src/llvm/include/pmu/pmu-utils.h new file mode 100644 index 0000000..5e3e014 --- /dev/null +++ b/src/llvm/include/pmu/pmu-utils.h @@ -0,0 +1,106 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PMU_UTILS_H +#define __PMU_UTILS_H + +#include <unistd.h> +#include <string.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include "perf_event.h" + +#ifndef ACCESS_ONCE +#define ACCESS_ONCE(x) (*(volatile decltype(x) *)&(x)) +#endif + +namespace pmu { + +static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, + unsigned long flags) { + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static inline void perf_attr_init(struct perf_event_attr *attr, int type, + int config) { + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = type; + attr->config = config; + attr->size = sizeof(struct perf_event_attr); + attr->disabled = 1; + attr->exclude_kernel = 1; + attr->exclude_guest = 1; + attr->exclude_hv = 1; +} + +static inline int perf_event_start(int fd) { + return ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); +} + +static inline int perf_event_stop(int fd) { + return ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); +} + +static inline int perf_event_reset(int fd) { + return ioctl(fd, PERF_EVENT_IOC_RESET, 0); +} + +static inline int perf_event_set_filter(int fd, const char *arg) { + return ioctl(fd, PERF_EVENT_IOC_SET_FILTER, (void *)arg); +} + +static inline uint64_t perf_read_data_head(void *header) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + uint64_t head = ACCESS_ONCE(pc->data_head); + pmu_rmb(); + return head; +} + +static inline void perf_write_data_tail(void *header, uint64_t val) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + pmu_mb(); + pc->data_tail = val; +} + +static inline uint64_t perf_read_aux_head(void *header) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + uint64_t head = ACCESS_ONCE(pc->aux_head); + pmu_rmb(); + return head; +} + +static inline void perf_write_aux_tail(void *header, uint64_t val) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + pmu_mb(); + pc->aux_tail = val; +} + +static inline int isPowerOf2(uint64_t value) { + if (!value) + return 0; + return !(value & (value - 1)); +} + +/* Convert system errno to PMU error code. */ +static inline int ErrorCode(int err) +{ + switch (err) { + case EPERM: + case EACCES: return PMU_EPERM; + case ENOMEM: return PMU_ENOMEM; + default: return PMU_EEVENT; + } +} + +} /* namespace pmu */ + +#endif /* __PMU_UTILS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/pmu/pmu.h b/src/llvm/include/pmu/pmu.h new file mode 100644 index 0000000..89a7c98 --- /dev/null +++ b/src/llvm/include/pmu/pmu.h @@ -0,0 +1,170 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * Hardware Performance Monitoring Unit (PMU), C++ interfaces. + */ + +#ifndef __PMU_H +#define __PMU_H + +#include <vector> +#include <memory> +#include <stdint.h> + +namespace pmu { + +#define PMU_GROUP_EVENTS (8) +#define PMU_TIMER_PERIOD (400) /* micro-second */ +#define PMU_INVALID_HNDL ((Handle)-1) + +typedef unsigned Handle; +/* Sampling event overflow handling. */ +typedef std::vector<uint64_t> SampleList; +typedef std::unique_ptr<SampleList> SampleDataPtr; +typedef void (*SampleHandlerTy)(Handle Hndl, SampleDataPtr Data, void *Opaque); + +/* Error code. */ +enum { + PMU_OK = 0, /* No error */ + PMU_EINVAL = -1, /* Invalid argument */ + PMU_ENOMEM = -2, /* Insufficient memory */ + PMU_ENOEVENT = -3, /* Pre-defined event not available */ + PMU_EEVENT = -4, /* Hardware event error */ + PMU_EPERM = -5, /* Permission denied */ + PMU_EINTER = -6, /* Internal error */ + PMU_EDECODER = -7, /* Instruction trace decoder error */ +}; + +/* Pre-defined event code. */ +enum { + /* Basic events */ + PMU_CPU_CYCLES = 0, + PMU_REF_CPU_CYCLES, + PMU_INSTRUCTIONS, + PMU_LLC_REFERENCES, + PMU_LLC_MISSES, + PMU_BRANCH_INSTRUCTIONS, + PMU_BRANCH_MISSES, + /* Instruction cache events */ + PMU_ICACHE_HITS, + PMU_ICACHE_MISSES, + /* Memory instruction events */ + PMU_MEM_LOADS, + PMU_MEM_STORES, + + PMU_EVENT_MAX, +}; + +/* PMU initial configuration. */ +struct PMUConfig { + /* Input */ + int SignalReceiver; /* TID of the signal receiver. 0 for auto-select. */ + uint32_t Timeout; /* Timer period in micro-second. 0 for auto-select. */ + + /* Output */ + int PerfVersion; /* Perf version used in this PMU tool */ + int OSPerfVersion; /* Perf version used in the OS kernel */ +}; + +/* Config for sampling with one or multiple event(s).*/ +struct SampleConfig { + unsigned NumEvents; /* Number of events in the event group */ + unsigned EventCode[PMU_GROUP_EVENTS]; /* Event group. The 1st event is the leader. */ + unsigned NumPages; /* Number of pages as the sample buffer size. (must be 2^n) */ + uint64_t Period; /* Sampling period of the group leader. */ + uint64_t Watermark; /* Bytes before wakeup. 0 for every timer period. */ + SampleHandlerTy SampleHandler; /* User handler routine */ + void *Opaque; /* An opaque pointer passed to the overflow handler. */ +}; + +/* Config for sampling with only one event. */ +struct Sample1Config { + unsigned EventCode; /* Pre-defined event to trigger counter overflow */ + unsigned NumPages; /* Number of pages as the sample buffer size. (must be 2^n) */ + uint64_t Period; /* Sampling period */ + uint64_t Watermark; /* Bytes before wakeup. 0 for every timer period. */ + SampleHandlerTy SampleHandler; /* User handler routine */ + void *Opaque; /* An opaque pointer passed to the overflow handler. */ +}; + +/* + * PMU main tools. + */ +class PMU { + PMU() = delete; + ~PMU() = delete; + +public: + /* Initialize the PMU module. */ + static int Init(PMUConfig &Config); + + /* Finalize the PMU module. */ + static int Finalize(void); + + /* Stop the PMU module. When the PMU module is paused, the user can continue + * to use counting events, but the overflow handler will not be invoked. */ + static int Pause(void); + + /* Restart the PMU module. After the PMU module is resumed, the overflow + * handler will be invoked. */ + static int Resume(void); + + /* Start a counting/sampling/tracing event. */ + static int Start(Handle Hndl); + + /* Stop a counting/sampling/tracing event. */ + static int Stop(Handle Hndl); + + /* Reset the hardware counter. */ + static int Reset(Handle Hndl); + + /* Remove an event. */ + static int Cleanup(Handle Hndl); + + /* Start/stop a sampling/tracing event without acquiring a lock. + * Note that these two function should only be used within the overflow + * handler. Since the overflow handling is already in a locked section, + * acquiring a lock is not required. */ + static int StartUnlocked(Handle Hndl); + static int StopUnlocked(Handle Hndl); + + /* Open an event using the pre-defined event code. */ + static int CreateEvent(unsigned EventCode, Handle &Hndl); + + /* Open an event using the raw event number and umask value. + * The raw event code is computed as (RawEvent | (Umask << 8)). */ + static int CreateRawEvent(unsigned RawEvent, unsigned Umask, Handle &Hndl); + + /* Open a sampling event, with the 1st EventCode as the interrupt event. + * The sample data will be recorded in a vector of type 'uint64_t'. + * The following vector shows the data format of sampling with N events: + * { pc, val1, val2, ..., valN, # 1st sample + * ... + * pc, val1, val2, ..., valN }; # nth sample + * + * Note that ownwership of the output vector is transferred to the user. + * It is the user's responsibility to free the resource of the vector. */ + static int CreateSampleEvent(SampleConfig &Config, Handle &Hndl); + + /* Generate an IP histogram, using EventCode as the interrupt event. + * The IP histogram will be recorded in a vector of type 'uint64_t' with + * the format: { pc1, pc2, pc3, ..., pcN }. + * Note that ownwership of the output vector is transferred to the user. + * It is the user's responsibility to free the resource of the vector. */ + static int CreateSampleIP(Sample1Config &Config, Handle &Hndl); + + /* Read value from the hardware counter. */ + static int ReadEvent(Handle Hndl, uint64_t &Value); + + /* Convert error code to string. */ + static const char *strerror(int ErrCode); +}; + +} /* namespace pmu */ + +#endif /* __PMU_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/pmu/ppc/ppc-events.h b/src/llvm/include/pmu/ppc/ppc-events.h new file mode 100644 index 0000000..f48e10d --- /dev/null +++ b/src/llvm/include/pmu/ppc/ppc-events.h @@ -0,0 +1,30 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PPC_EVENTS_H +#define __PPC_EVENTS_H + +#include <vector> +#include "pmu/pmu.h" + +namespace pmu { + +class PMUEvent; + +#if defined(_ARCH_PPC) || defined(_ARCH_PPC64) +#define pmu_mb() __asm__ __volatile__ ("sync" : : : "memory") +#define pmu_rmb() __asm__ __volatile__ ("sync" : : : "memory") +#define pmu_wmb() __asm__ __volatile__ ("sync" : : : "memory") +#endif + +int PPCInit(void); + +} /* namespace pmu */ + +#endif /* __PPC_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/pmu/x86/x86-events.h b/src/llvm/include/pmu/x86/x86-events.h new file mode 100644 index 0000000..c6fdb95 --- /dev/null +++ b/src/llvm/include/pmu/x86/x86-events.h @@ -0,0 +1,38 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __X86_EVENTS_H +#define __X86_EVENTS_H + +#include <vector> +#include "pmu/pmu.h" + +namespace pmu { + +class PMUEvent; + +#if defined(__i386__) +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define pmu_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define pmu_rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define pmu_wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#elif defined(__x86_64__) +#define pmu_mb() asm volatile("mfence" ::: "memory") +#define pmu_rmb() asm volatile("lfence" ::: "memory") +#define pmu_wmb() asm volatile("sfence" ::: "memory") +#endif + +int X86Init(void); + +} /* namespace pmu */ + +#endif /* __X86_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/src/llvm/include/qemu-types.h b/src/llvm/include/qemu-types.h new file mode 100644 index 0000000..f2430e0 --- /dev/null +++ b/src/llvm/include/qemu-types.h @@ -0,0 +1,33 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __QEMU_TYPES_H +#define __QEMU_TYPES_H + +extern "C" { +#include "cpu.h" +#include "exec/tb-hash.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "tcg/tcg.h" +#include "qemu/atomic.h" +#include "hqemu.h" + +extern uint8_t *tb_ret_addr; +extern uint8_t *ibtc_ret_addr; + +} + +#ifdef inline +#undef inline +#endif + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/tcg-opc-vector.h b/src/llvm/include/tcg-opc-vector.h new file mode 100644 index 0000000..bc03ea1 --- /dev/null +++ b/src/llvm/include/tcg-opc-vector.h @@ -0,0 +1,80 @@ +DEF(vector_start, 0, 0, 0, 0) + +DEF(vmov_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vload_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vstore_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vsitofp_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vuitofp_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfptosi_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfptoui_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vadd_i8_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i16_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i8_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i16_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vsub_i8_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i16_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i8_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i16_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vadd_f32_128, 0, 0, 0, 0) +DEF(vadd_f64_128, 0, 0, 0, 0) +DEF(vadd_f32_64, 0, 0, 0, 0) +DEF(vpadd_f32_128, 0, 0, 0, 0) +DEF(vpadd_f64_128, 0, 0, 0, 0) +DEF(vpadd_f32_64, 0, 0, 0, 0) +DEF(vsub_f32_128, 0, 0, 0, 0) +DEF(vsub_f64_128, 0, 0, 0,0) +DEF(vsub_f32_64, 0, 0, 0, 0) +DEF(vabd_f32_128, 0, 0, 0 ,0) +DEF(vabd_f64_128, 0, 0, 0 ,0) +DEF(vabd_f32_64, 0, 0, 0, 0) + +DEF(vfma_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfma_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfma_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfms_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfms_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfms_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vmul_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmul_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmul_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmla_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmla_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmla_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmls_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmls_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmls_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vdiv_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vdiv_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vdiv_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vand_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vand_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbic_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbic_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorr_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorr_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorn_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorn_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(veor_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(veor_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vbif_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbif_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbit_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbit_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbsl_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbsl_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vector_end, 0, 0, 0, 0) diff --git a/src/llvm/include/tracer.h b/src/llvm/include/tracer.h new file mode 100644 index 0000000..2813e0e --- /dev/null +++ b/src/llvm/include/tracer.h @@ -0,0 +1,109 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __TRACE_H +#define __TRACE_H + +#include <vector> +#include <iostream> +#include "qemu-types.h" +#include "optimization.h" +#include "utils.h" + + +/* + * Base processor tracer + */ +class BaseTracer { +public: + CPUArchState *Env; + void *Perf; + + BaseTracer(CPUArchState *env) : Env(env), Perf(nullptr) {} + virtual ~BaseTracer() {} + virtual void Reset() {} + virtual void Record(uintptr_t next_tb, TranslationBlock *tb) {} + + /* Create and return the tracer object based on LLVM_MODE. */ + static BaseTracer *CreateTracer(CPUArchState *env); + + /* Release the trace resources. */ + static void DeleteTracer(CPUArchState *env); +}; + + +/* + * Trace of a single basic block + */ +class SingleBlockTracer : public BaseTracer { + TranslationBlock *TB; + +public: + SingleBlockTracer(CPUArchState *env); + + void Record(uintptr_t next_tb, TranslationBlock *tb) override; +}; + + +/* + * Trace with NET trace formation algorithm + */ +#define NET_PROFILE_THRESHOLD 50 +#if defined(CONFIG_SOFTMMU) +# define NET_PREDICT_THRESHOLD 16 +#else +# define NET_PREDICT_THRESHOLD 64 +#endif +class NETTracer : public BaseTracer { + bool isTraceHead(uintptr_t next_tb, TranslationBlock *tb, bool NewTB); + +public: + typedef std::vector<TranslationBlock *> TBVec; + TBVec TBs; + + NETTracer(CPUArchState *env, int Mode); + ~NETTracer(); + + void Reset() override; + void Record(uintptr_t next_tb, TranslationBlock *tb) override; + inline void Profile(TranslationBlock *tb); + inline void Predict(TranslationBlock *tb); +}; + +/* Return the address of the patch point to the trace code. */ +static inline uintptr_t tb_get_jmp_entry(TranslationBlock *tb) { + return (uintptr_t)tb->tc_ptr + tb->patch_jmp; +} +/* Return the initial jump target address of the patch point. */ +static inline uintptr_t tb_get_jmp_next(TranslationBlock *tb) { + return (uintptr_t)tb->tc_ptr + tb->patch_next; +} +static inline SingleBlockTracer &getSingleBlockTracer(CPUArchState *env) { + return *static_cast<SingleBlockTracer *>(cpu_get_tracer(env)); +} +static inline NETTracer &getNETTracer(CPUArchState *env) { + return *static_cast<NETTracer *>(cpu_get_tracer(env)); +} + +static inline void delete_image(TranslationBlock *tb) +{ +#if defined(CONFIG_LLVM) && defined(CONFIG_SOFTMMU) + delete (char *)tb->image; + tb->image = nullptr; +#endif +} + +static inline bool update_tb_mode(TranslationBlock *tb, int from, int to) { + if (tb->mode != from) + return false; + return Atomic<int>::testandset(&tb->mode, from, to); +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/src/llvm/include/utils.h b/src/llvm/include/utils.h new file mode 100644 index 0000000..90b36d9 --- /dev/null +++ b/src/llvm/include/utils.h @@ -0,0 +1,260 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __UTILS_H +#define __UTILS_H + +#include <cstdint> +#include <cstdlib> +#include <sstream> +#include <iomanip> +#include <set> +#include <map> +#include <vector> +#include "qemu-types.h" + + +#ifndef timersub +# define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif + +#if !defined(__i386__) && !defined(__x86_64__) +#define USE_PTHREAD_MUTEX +#endif + +#if defined(USE_PTHREAD_MUTEX) +# define hqemu_lock_t pthread_mutex_t +# define hqemu_lock_init(lock) pthread_mutex_init(lock, nullptr) +# define hqemu_lock(lock) pthread_mutex_lock(lock) +# define hqemu_unlock(lock) pthread_mutex_unlock(lock) +#else +# define hqemu_lock_t volatile int +# define hqemu_lock_init(lock) do { *lock = 0; } while(0) +# define hqemu_lock(lock) \ + do { \ + while (!Atomic<int>::testandset(lock,0,1)) { \ + while(*(lock)) _mm_pause(); \ + } \ + } while(0) +# define hqemu_unlock(lock) \ + do { \ + barrier(); \ + *(lock) = 0; \ + } while(0) +#endif /* USE_PTHREAD_MUTEX */ + + +/* + * Atomic Utilities + */ +template<class T> +class Atomic { +public: + static T inc_return(volatile T *p) { + return __sync_fetch_and_add(p, 1) + 1; + } + static bool testandset(volatile T *p, T _old, T _new) { + return __sync_bool_compare_and_swap(p, _old, _new); + } +}; + + +/* + * Mutex + */ +namespace hqemu { +class Mutex { + hqemu_lock_t M; +public: + Mutex() { hqemu_lock_init(&M); } + inline void acquire() { hqemu_lock(&M); } + inline void release() { hqemu_unlock(&M); } +}; + +class MutexGuard { + Mutex &M; +public: + MutexGuard(Mutex &M) : M(M) { M.acquire(); } + ~MutexGuard() { M.release(); } +}; +}; + + +/* + * GraphNode is used to describe the information of one node in a CFG. + */ +class GraphNode; +typedef std::vector<GraphNode *> NodeVec; +typedef std::set<GraphNode *> NodeSet; + +class GraphNode { + TranslationBlock *TB; + NodeVec Children; + +public: + GraphNode(TranslationBlock *tb) : TB(tb) {} + + TranslationBlock *getTB() { return TB; } + target_ulong getGuestPC() { return TB->pc; } + NodeVec &getChildren() { return Children; } + void insertChild(GraphNode *Node) { + Children.push_back(Node); + } + + static void DeleteCFG(GraphNode *Root); +}; + +/* + * ControlFlowGraph is used to build the whole program control flow graph (CFG). + * GlobalCFG uses this structure to maintain a whole program CFG connected by + * direct branches. + */ +class ControlFlowGraph { + hqemu::Mutex lock; + +public: + typedef std::vector<TranslationBlock *> TBVec; + typedef std::map<TranslationBlock*, TBVec> SuccMap; + SuccMap SuccCFG; + + ControlFlowGraph() {} + + hqemu::Mutex &getLock() { return lock; } + TBVec &getSuccessor(TranslationBlock *tb) { + return SuccCFG[tb]; + } + + void reset() { + hqemu::MutexGuard locked(lock); + SuccCFG.clear(); + } + void insertLink(TranslationBlock *src, TranslationBlock *dst) { + hqemu::MutexGuard locked(lock); + SuccCFG[src].push_back(dst); + } +}; + + +/* + * Queue + */ +#if defined(__x86_64__) +#define LOCK_FREE +#endif + +#ifdef LOCK_FREE +struct pointer_t { + struct node_t *ptr; + unsigned long int count; +}; + +struct node_t { + struct pointer_t next; + void *value; +}; + +/* Lock-free MS-queue */ +class Queue { + struct queue_t { + struct pointer_t head; + struct pointer_t tail; + }; + + node_t *new_node(void *value) { + node_t *node = new node_t; + node->next.ptr = nullptr; + node->value = value; + return node; + } + void delete_node(node_t *node) { + delete node; + } + + queue_t Q; + +public: + Queue(); + void enqueue(void *data); + void *dequeue(); +}; +#else +class Queue { + struct node_t { + struct node_t *next; + void *value; + node_t(void *v) : next(nullptr), value(v) {} + }; + struct queue_t { + struct node_t *head; + struct node_t *tail; + }; + + pthread_mutex_t lock; + queue_t Q; + +public: + Queue(); + void enqueue(void *data); + void *dequeue(); +}; +#endif + + +class UUID { + static uint64_t uuid; + +public: +#if defined(__x86_64__) + static uint64_t gen() { + uint64_t i = 1; + asm volatile("lock; xaddq %0, %1" + : "+r" (i), "+m" (uuid) :: "memory"); + return i + 1; + } +#else + static uint64_t gen() { + static pthread_mutex_t uuid_lock = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&uuid_lock); + uint64_t id = uuid++; + pthread_mutex_unlock(&uuid_lock); + return id; + } +#endif +}; + +/* Return the string of a hexadecimal number. */ +template <class T> +static inline std::string toHexString(T Num) { + std::stringstream ss; + ss << "0x" << std::hex << Num; + return ss.str(); +} + +/* Return the string of a zero extended number. */ +template <class T> +static inline std::string toZextStr(T Num) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(16) << Num; + return ss.str(); +} + +/* Misc utilities */ +pid_t gettid(); +void patch_jmp(volatile uintptr_t patch_addr, volatile uintptr_t addr); +void patch_jmp(volatile uintptr_t patch_addr, volatile void *addr); + +#endif +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + |