diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2019-11-29 19:00:14 -0600 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2019-11-29 19:02:28 -0600 |
commit | 4b3250c5073149c59c5c11e06c2c0d93b6a9f5ff (patch) | |
tree | dce73321255f834f7b2d4c16fa49760edb534f27 /llvm/llvm-translator.cpp | |
parent | a58047f7fbb055677e45c9a7d65ba40fbfad4b92 (diff) | |
download | hqemu-2.5.1_overlay.zip hqemu-2.5.1_overlay.tar.gz |
Initial overlay of HQEMU 2.5.2 changes onto underlying 2.5.1 QEMU GIT tree2.5.1_overlay
Diffstat (limited to 'llvm/llvm-translator.cpp')
-rw-r--r-- | llvm/llvm-translator.cpp | 924 |
1 files changed, 924 insertions, 0 deletions
diff --git a/llvm/llvm-translator.cpp b/llvm/llvm-translator.cpp new file mode 100644 index 0000000..e435b1f --- /dev/null +++ b/llvm/llvm-translator.cpp @@ -0,0 +1,924 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Analysis/InlineCost.h" +#include "fpu/softfloat-native-def.h" +#include "utils.h" +#include "tracer.h" +#include "llvm.h" +#include "llvm-debug.h" +#include "llvm-soft-perfmon.h" +#include "llvm-hard-perfmon.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "llvm-opc.h" +#include "llvm-state.h" +#include "llvm-translator.h" + + +static cl::opt<bool> DisableFastMath("disable-fast-math", cl::init(false), + cl::cat(CategoryHQEMU), cl::desc("Disable fast-math optimizations")); + + +static char include_helper[][64] = { +#include "llvm-helper.h" +}; + +extern LLVMEnv *LLEnv; +extern hqemu::Mutex llvm_global_lock; +extern hqemu::Mutex llvm_debug_lock; + +extern bool TraceCacheFull; + + +#if defined(TCG_TARGET_I386) +# if defined(__i386__) +# define AREG0 "ebp" +# elif defined(__x86_64__) +# define AREG0 "r14" +# endif +#elif defined(TCG_TARGET_PPC64) +# define AREG0 "r27" +#elif defined(TCG_TARGET_ARM) +# define AREG0 "r7" +#elif defined(TCG_TARGET_AARCH64) +# define AREG0 "x19" +# define AREG1 "x28" +#else +# error "unsupported processor type" +#endif +const char *BaseRegStr = AREG0; /* The base register name */ + + + +/* + * LLVM Translator + */ +LLVMTranslator::LLVMTranslator(unsigned id, CPUArchState *env) + : MyID(id), Env(env) +{ + dbg() << DEBUG_LLVM << "Starting LLVM Translator " << MyID << ".\n"; + + if (!Env) + hqemu_error("internal error. LLVMEnv is not initialized.\n"); + + /* Create LLVM module and basic types. */ + InitializeModule(); + InitializeType(); + InitializeTarget(); + InitializeHelpers(); + InitializeDisasm(); + + /* Create the TCG IR to LLVM IR conversion module. */ + IF = new IRFactory(this); + +#if defined(ENABLE_MCJIT) + if (MyID == 0) + LLEnv->getMemoryManager()->AddSymbols(Symbols); +#endif + + dbg() << DEBUG_LLVM << "LLVM Translator " << MyID << " initialized.\n"; +} + +LLVMTranslator::~LLVMTranslator() +{ + if (GuestDisAsm) delete GuestDisAsm; + if (HostDisAsm) delete HostDisAsm; + delete IF; + delete Mod; +} + +/* Perform the initialization of the LLVM module. */ +void LLVMTranslator::InitializeModule() +{ + const char *p = strrchr(CONFIG_LLVM_BITCODE, '/'); + if (!p || ++p == 0) + hqemu_error("unknown bitcode file.\n"); + + std::string Bitcode(p); + std::vector<std::string> Path; + + Path.push_back(std::string("/etc/hqemu/").append(Bitcode)); + p = getenv("HOME"); + if (p) + Path.push_back(std::string(p).append("/.hqemu/").append(Bitcode)); + Path.push_back(CONFIG_LLVM_BITCODE); + + unsigned i = 0, e = Path.size(); + for (; i != e; ++i) { + struct stat buf; + if (stat(Path[i].c_str(), &buf) != 0) + continue; + + SMDiagnostic Err; +#if defined(LLVM_V35) + Mod = ParseIRFile(Path[i], Err, Context); +#else + std::unique_ptr<Module> Owner = parseIRFile(Path[i], Err, Context); + Mod = Owner.release(); +#endif + if (Mod) + break; + } + + if (i == e) + hqemu_error("cannot find bitcode file %s.\n", Bitcode.c_str()); + + DL = getDataLayout(Mod); + + dbg() << DEBUG_LLVM << "Use bitcode file " << Path[i] << ".\n"; + dbg() << DEBUG_LLVM << "LLVM module initialized (" << Mod->getTargetTriple() << ").\n"; +} + +void LLVMTranslator::InitializeType() +{ + VoidTy = Type::getVoidTy(Context); + Int8Ty = IntegerType::get(Context, 8); + Int16Ty = IntegerType::get(Context, 16); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + Int128Ty = IntegerType::get(Context, 128); + + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int16PtrTy = Type::getInt16PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + + FloatTy = Type::getFloatTy(Context); + DoubleTy = Type::getDoubleTy(Context); + + FloatPtrTy = Type::getFloatPtrTy(Context, 0); + DoublePtrTy = Type::getDoublePtrTy(Context, 0); +} + +/* Setup guest-dependent data structures. */ +void LLVMTranslator::InitializeTarget() +{ + /* TODO: any smart way to hack into CPUArchState type? */ + Value *Base = Mod->getNamedValue("basereg"); + if (!Base) + hqemu_error("cannot resolve cpu_proto.\n"); + + BaseReg.resize(TCG_TARGET_NB_REGS); + BaseReg[TCG_AREG0].RegNo = TCG_AREG0; + BaseReg[TCG_AREG0].Name = BaseRegStr; + BaseReg[TCG_AREG0].Ty = Base->getType(); + BaseReg[TCG_AREG0].Base = nullptr; + +#if defined(CONFIG_USER_ONLY) && defined(AREG1) + if (guest_base != 0 || TARGET_LONG_BITS == 32) { + GuestBaseReg.Name = AREG1; + GuestBaseReg.Base = nullptr; + } +#endif + + /* Define the new types of special registers. */ + std::map<Type *, Type *> SpecialReg; + DefineSpecialReg(SpecialReg); + + /* Convert the CPUArchState of aggregate type to the list of single element + * of primitive type. */ + intptr_t Off = 0; + FlattenCPUState(Base->getType()->getContainedType(0), Off, SpecialReg); +} + +/* This function defines the special registers and the new types to be reset. */ +void LLVMTranslator::DefineSpecialReg(std::map<Type *, Type *> &SpecialReg) +{ +#if defined(TARGET_I386) + Value *SIMDReg = Mod->getNamedValue("xmm_reg"); + if (SIMDReg) { + /* remap XMMReg --> <64 x i8> */ + Type *Int8Ty = IntegerType::get(Context, 8); + Type *OldTy = SIMDReg->getType()->getContainedType(0); + Type *NewTy = VectorType::get(Int8Ty, 16); + SpecialReg[OldTy] = NewTy; + } +#endif +} + +/* Convert the CPUArchState of the aggregate type to a list of single element of + * primitive type. Each element contains a pair of offset to CPUArchState and its + * type. This list of flattened type will be used for the state mapping pass. */ +void LLVMTranslator::FlattenCPUState(Type *Ty, intptr_t &Off, + std::map<Type *, Type *> &SpecialReg) +{ + switch (Ty->getTypeID()) { + default: + { + StateType[Off] = Ty; + Off += DL->getTypeSizeInBits(Ty) / 8; + break; + } + case Type::StructTyID: + { + /* Map a special register to another type with the same size as the + * original type. E.g., mapping a <16 * i8> type to <2 * i64>. */ + if (SpecialReg.find(Ty) != SpecialReg.end()) { + Type *NewTy = SpecialReg[Ty]; + StateType[Off] = NewTy; + Off += DL->getTypeSizeInBits(Ty) / 8; + break; + } + + StructType *STy = cast<StructType>(Ty); + intptr_t Size = DL->getTypeSizeInBits(STy) / 8; + intptr_t SubOff; + + const StructLayout *SL = DL->getStructLayout(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + SubOff = Off + SL->getElementOffset(i); + FlattenCPUState(STy->getElementType(i), SubOff, SpecialReg); + } + + Off += Size; + + /* Structure could have padding at the end of the struct. Expand + * the size of the last struct member by adding the padding size. */ + if (Off != SubOff) { + intptr_t LastOff = StateType.rbegin()->first; + intptr_t NewSize = (Off - LastOff) * 8; + Type *NewTy = IntegerType::get(Context, NewSize); + StateType[LastOff] = NewTy; + } + break; + } + case Type::ArrayTyID: + { +#if defined(CONFIG_SOFTMMU) + /* Do not flatten the SoftTLB because it could create a huge amount + * of flattened states. */ + if (Off == offsetof(CPUArchState, tlb_table[0][0])) { + StateType[Off] = Ty; + Off += DL->getTypeSizeInBits(Ty) / 8; + break; + } +#endif + ArrayType *ATy = cast<ArrayType>(Ty); + intptr_t ElemSize = DL->getTypeSizeInBits(ATy->getElementType()) / 8; + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + intptr_t SubOff = Off + i * ElemSize; + FlattenCPUState(ATy->getElementType(), SubOff, SpecialReg); + } + Off += DL->getTypeSizeInBits(ATy) / 8; + break; + } + } +} + +static inline void Materialize(Function &F) +{ +#if defined(LLVM_V35) + std::string ErrInfo; + F.Materialize(&ErrInfo); +#else + F.materialize(); +#endif +} + +/* Materialize helper functions and compute inline costs. */ +void LLVMTranslator::InitializeHelpers() +{ + /* Set target-specific symbols. */ + AddDependentSymbols(this); + + /* Set const helpers. (i.e., helpers that have no side effect) */ + InitializeConstHelpers(); + + /* Materialize fpu helper functions. */ + TCGHelperInfo *FPUHelper = (TCGHelperInfo *)get_native_fpu_helpers(); + for (int i = 0, e = num_native_fpu_helpers(); i != e; ++i) { + std::string ErrInfo; + Function *Func = Mod->getFunction(FPUHelper[i].name); + if (Func && Func->isMaterializable()) + Materialize(*Func); + } + + /* Materialize defined helper functions that are allowed for inlining. */ + for (int i = 0, e = ARRAY_SIZE(include_helper); i < e; ++i) { + std::string ErrInfo; + Helpers[include_helper[i]] = new HelperInfo; + Function *Func = Mod->getFunction(include_helper[i]); + if (Func && Func->isMaterializable()) + Materialize(*Func); + } + + /* Initialize all TCG helper functions. */ + const TCGHelperInfo *all_helpers = get_tcg_helpers(); + for (int i = 0, e = tcg_num_helpers(); i != e; ++i) { + uintptr_t func = (uintptr_t)all_helpers[i].func; + const char *name = all_helpers[i].name; + if (!name) + hqemu_error("invalid helper name.\n"); + + TCGHelpers[func] = std::string("helper_") + std::string(name); + } + + for (int i = 0, e = tcg_num_helpers(); i != e; ++i) { + std::string FName = std::string("helper_") + + std::string(all_helpers[i].name); + std::string FNameNoInline = FName + std::string("_noinline"); + if (Helpers.find(FName) != Helpers.end()) { + HelperInfo *Helper = Helpers[FName]; + Function *F = Mod->getFunction(FName); + if (!F) + hqemu_error("fatal error - %s\n", FName.c_str()); + Helper->Func = F; + Mod->getOrInsertFunction(FNameNoInline, F->getFunctionType()); + Helper->FuncNoInline = Mod->getFunction(FNameNoInline); + Helpers[FNameNoInline] = Helper; + + AddSymbol(FNameNoInline, all_helpers[i].func); + } + } + + /* Analyze the inline cost for each helper function and make a non-inlined + * counterpart object in LLVM Module. For the non-inlined function, just + * remap the function address in LLVM module which causes the JIT to emit a + * call instruction to the function address. */ + for (int i = 0, e = tcg_num_helpers(); i != e; ++i) { + const TCGHelperInfo *th = &all_helpers[i]; + std::string FName = std::string("helper_") + std::string(th->name); + if (Helpers.find(FName) != Helpers.end()) { + HelperInfo *Helper = Helpers[FName]; + bool ret = OptimizeHelper(*Helper); + if (!ret) { + /* If the helper function consists of loops, it is not suitable + * to be inlined because it conflicts to the state mapping + * pass. */ + Helpers.erase(FName); + goto skip; + } + + Helper->CalculateMetrics(Helper->Func); + continue; + } +skip: + AddSymbol(FName, th->func); + } + + /* Add all states of the nested helpers to the calling helper. + * Then, calculate state boundary and determine if we can know all states + * (included in the nested functions) by this helper function. + * + * Note that we only allow one-level helper inlining. */ + for (auto &I : Helpers) { + HelperInfo *Helper = I.second; + bool hasNestNestedCall = false; + for (CallInst *CI : Helper->NestedCalls) { + std::string FName = CI->getCalledFunction()->getName(); + HelperInfo *NestedHelper = Helpers[FName]; + Helper->States.insert(Helper->States.begin(), + NestedHelper->States.begin(), + NestedHelper->States.end()); + + CI->setCalledFunction(NestedHelper->FuncNoInline); + if (I.first != FName && NestedHelper->hasNestedCall) + hasNestNestedCall = true; + } + /* Clear hasNestedCall if onle one level nested functions. If the + * helper has only one level nested helpers, then all states are found. */ + Helper->hasNestedCall = hasNestNestedCall; + + /* Compute state boundaries. */ + StateAnalyzer Analyzer(DL); + for (auto J : Helper->States) + Analyzer.addStateRef(J.first, J.second); + + StateRange Reads, Writes; + Analyzer.computeStateRange(Reads, Writes); + + Helper->insertState(Reads, false); + Helper->insertState(Writes, true); + } + + for (auto &I : Helpers) { + HelperInfo *Helper = I.second; + Helper->States.clear(); + Helper->NestedCalls.clear(); + } +} + +void LLVMTranslator::InitializeDisasm() +{ + std::string TargetTriple = "UnknownArch"; + +#if defined(TARGET_I386) + #if defined(TARGET_X86_64) + TargetTriple = "x86_64"; + #else + TargetTriple = "i386"; + #endif +#elif defined(TARGET_ARM) + #if defined(TARGET_AARCH64) + TargetTriple = "aarch64"; + #else + TargetTriple = "arm"; + #endif +#elif defined(TARGET_PPC) + TargetTriple = "ppc"; +#endif + + GuestDisAsm = MCDisasm::CreateMCDisasm(TargetTriple, false); + HostDisAsm = MCDisasm::CreateMCDisasm(Mod->getTargetTriple(), true); + + if (GuestDisAsm) + dbg() << DEBUG_INASM << __func__ + << ": use LLVM disassembler for guest (" << TargetTriple << ").\n"; + else + dbg() << DEBUG_INASM << __func__ + << ": can't find LLVM disassembler for guest (" + << TargetTriple << "). Use QEMU disas.\n"; + + if (HostDisAsm) + dbg() << DEBUG_OUTASM << __func__ + << ": use LLVM disassembler for host (" + << Mod->getTargetTriple() << ").\n"; + else + dbg() << DEBUG_OUTASM << __func__ + << ": can't find LLVM disassembler for host (" + << Mod->getTargetTriple() << "). Use QEMU disas.\n"; +} + +static bool isLegalIntrinsic(IntrinsicInst *II) +{ + switch (II->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::dbg_declare: + return false; + default: + break; + } + return true; +} + +/* Determine if the function argument and Ptr are alias. */ +static Value *isFromFuncArgument(Function &F, Value *Ptr) +{ + Ptr = StripPointer(Ptr); + for (auto I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { + if (Ptr == &*I) + return Ptr; + } + return nullptr; +} + +/* Create function pass manager to optimize the helper function. */ +static void Optimize(Function &F) +{ + auto FPM = new legacy::FunctionPassManager(F.getParent()); + + FPM->add(createReplaceIntrinsic()); + if (!DisableFastMath) + FPM->add(createFastMathPass()); + FPM->run(F); + + delete FPM; +} + +/* Analyze and optimize a helper function. */ +bool LLVMTranslator::OptimizeHelper(HelperInfo &Helper) +{ + Function &F = *Helper.Func; + + /* We don't want to inline helper functions that contain loop. */ + SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges; + FindFunctionBackedges(F, BackEdges); + if (BackEdges.size()) + return false; + + Optimize(F); + + /* Collect and analyze memory and call instructions. */ + SmallVector<CallInst *, 16> Calls; + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; ++II) { + Instruction *I = &*II; + + if (isa<LoadInst>(I) || isa<StoreInst>(I)) { + intptr_t Off = 0; + Value *Base = getBaseWithConstantOffset(DL, getPointerOperand(I), Off); + + if (auto GV = dyn_cast<GlobalValue>(StripPointer(Base))) { + if (!GV->hasPrivateLinkage()) + continue; + } + + /* XXX: We assume the pointer is derived from the function argument. + * Skip it if not from the the function argument. */ + Value *Arg = isFromFuncArgument(F, Base); + if (!Arg) + return false; + + if (Base->getType() == BaseReg[TCG_AREG0].Ty) { + /* This is a load/store of CPU state plus a constant offset. + * Track the state. */ + Helper.States.push_back(std::make_pair(I, Off)); + } else { + /* This is a load/store of unknown pointer. + * Track the maximum access size. */ + Type *Ty = cast<PointerType>(Arg->getType())->getElementType(); + intptr_t Size = DL->getTypeSizeInBits(Ty) / 8; + Helper.mayConflictArg = true; + Helper.ConflictSize = std::max(Helper.ConflictSize, Size); + } + } else if (CallInst *CI = dyn_cast<CallInst>(I)) { + Calls.push_back(CI); + } + } + + /* Analyze calls. */ + for (CallInst *CI : Calls) { + if (CI->isInlineAsm()) + continue; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + if (!isLegalIntrinsic(II)) + return false; + continue; + } + + if (!CI->getCalledFunction()) + return false; + + std::string FName = CI->getCalledFunction()->getName(); + if (isLibcall(FName) || isSoftFPcall(FName)) { + /* Libcalls/SoftFPCalls are always const function. Mark it. */ + ConstantInt *Meta[] = { CONST32(0) }; + MDFactory::setConstStatic(Context, CI, Meta); + continue; + } + + if (Helpers.find(FName) == Helpers.end()) + return false; + + Helper.hasNestedCall = true; + Helper.NestedCalls.push_back(CI); + } + + return true; +} + +/* Figure out an approximation for how many instructions will be constant + * folded if the specified value is constant. */ +static unsigned CountCodeReductionForConstant(Value *V, CodeMetrics &Metrics) +{ + unsigned IndirectCallBonus; + IndirectCallBonus = -InlineConstants::IndirectCallThreshold; + + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI) { + User *U = UI->getUser(); + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { + /* We will be able to eliminate all but one of the successors. */ + const TerminatorInst &TI = cast<TerminatorInst>(*U); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)]; + /* We don't know which blocks will be eliminated, so use the average size. */ + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc*2; + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + /* Turning an indirect call into a direct call is a BIG win */ + if (CI->getCalledValue() == V) + Reduction += IndirectCallBonus; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + /* Turning an indirect call into a direct call is a BIG win */ + if (II->getCalledValue() == V) + Reduction += IndirectCallBonus; + } else { + Instruction &Inst = cast<Instruction>(*U); + + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + /* We will get to remove this instruction... */ + Reduction += InlineConstants::InstrCost; + Reduction += CountCodeReductionForConstant(&Inst, Metrics); + } + } + } + return Reduction; +} + +/* Figure out an approximation of how much smaller the function will be if + * it is inlined into a context where an argument becomes an alloca. */ +static unsigned CountCodeReductionForAlloca(Value *V) +{ + if (!V->getType()->isPointerTy()) return 0; + + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI) { + Instruction *I = cast<Instruction>(UI->getUser()); + + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += InlineConstants::InstrCost; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + /* If the GEP has variable indices, we won't be able to do much with it. */ + if (GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + /* Track pointer through bitcasts. */ + Reduction += CountCodeReductionForAlloca(BCI); + } else + return 0; + } + + return Reduction; +} + +void HelperInfo::CalculateMetrics(Function *F) +{ + Metrics.NumInsts = 0; + + for (auto FI = F->begin(); FI != F->end(); FI++) { + unsigned NumInsts = 0; + BasicBlock *BB = &*FI; + for (auto BI = FI->begin(); BI != FI->end(); BI++) { + if (isa<PHINode>(BI)) /* PHI nodes don't count. */ + continue; + NumInsts++; + } + Metrics.NumBlocks++; + Metrics.NumInsts += NumInsts; + Metrics.NumBBInsts[BB] = NumInsts; + } + + ArgumentWeights.reserve(F->arg_size()); + for (auto I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { + Value *V = &*I; + ArgumentWeights.push_back(ArgInfo( + CountCodeReductionForConstant(V, Metrics), + CountCodeReductionForAlloca(V))); + } +} + +void LLVMTranslator::InitializeConstHelpers() +{ +#if defined(TARGET_I386) + ConstHelpers.insert("helper_outb"); + ConstHelpers.insert("helper_inb"); + ConstHelpers.insert("helper_outw"); + ConstHelpers.insert("helper_inw"); + ConstHelpers.insert("helper_outl"); + ConstHelpers.insert("helper_inl"); +#elif defined(TARGET_ARM) + ConstHelpers.insert("helper_vfp_tosis"); + ConstHelpers.insert("helper_vfp_tosid"); + ConstHelpers.insert("helper_vfp_tosizs"); + ConstHelpers.insert("helper_vfp_tosizd"); + ConstHelpers.insert("helper_vfp_touis"); + ConstHelpers.insert("helper_vfp_touid"); + ConstHelpers.insert("helper_vfp_touizs"); + ConstHelpers.insert("helper_vfp_touizd"); + + ConstHelpers.insert("helper_vfp_sitos"); + ConstHelpers.insert("helper_vfp_sitod"); + ConstHelpers.insert("helper_vfp_uitos"); + ConstHelpers.insert("helper_vfp_uitod"); + + ConstHelpers.insert("helper_vfp_fcvtds"); + ConstHelpers.insert("helper_vfp_fcvtsd"); + + ConstHelpers.insert("helper_vfp_cmps"); + ConstHelpers.insert("helper_vfp_cmpd"); + ConstHelpers.insert("helper_vfp_cmpes"); + ConstHelpers.insert("helper_vfp_cmped"); + +#if defined(TARGET_AARCH64) + ConstHelpers.insert("helper_vfp_tosls"); + ConstHelpers.insert("helper_vfp_tosld"); + ConstHelpers.insert("helper_vfp_sqtos"); + ConstHelpers.insert("helper_vfp_sqtod"); + ConstHelpers.insert("helper_vfp_uqtos"); + ConstHelpers.insert("helper_vfp_uqtod"); + + ConstHelpers.insert("helper_vfp_cmps_a64"); + ConstHelpers.insert("helper_vfp_cmpd_a64"); + ConstHelpers.insert("helper_vfp_cmpes_a64"); + ConstHelpers.insert("helper_vfp_cmped_a64"); + ConstHelpers.insert("helper_vfp_minnums"); + ConstHelpers.insert("helper_vfp_maxnums"); + ConstHelpers.insert("helper_vfp_minnumd"); + ConstHelpers.insert("helper_vfp_maxnumd"); + + ConstHelpers.insert("helper_get_cp_reg64"); + ConstHelpers.insert("helper_dc_zva"); +#endif +#endif +} + +void LLVMTranslator::Abort(TraceBuilder &Builder) +{ + target_ulong pc = Builder.getEntryNode()->getGuestPC(); + dbg() << DEBUG_LLVM << __func__ + << ": abort trace pc " << format("0x%" PRIx "", pc) << "\n"; +} + +/* Make a jump from the head block in the block code cache to the translated + * host code of this region in the optimized code cache. Also patch previous + * built regions that have direct branch to this region. */ +void LLVMTranslator::Commit(TraceBuilder &Builder) +{ + bool Invalid = false; + OptimizationInfo *Opt = Builder.getOpt(); + TraceInfo *Trace = Builder.getTrace(); + TBVec &TBs = Trace->TBs; + + for (unsigned i = 0, e = TBs.size(); i != e; ++i) { + if (TBs[i]->mode == BLOCK_INVALID) { + Invalid = true; + break; + } + } + + if (Invalid || llvm_check_cache() == 1) { + delete Trace; + delete Opt; + return; + } + + TranslatedCode *TC = new TranslatedCode; + TC->Active = true; + TC->Size = NI.Size; + TC->Code = NI.Code; + TC->EntryTB = Trace->getEntryTB(); + TC->Restore = NI.Restore; + TC->Trace = Trace; + + /* If we go here, this is a legal trace. */ + LLVMEnv::ChainSlot &ChainPoint = LLEnv->getChainPoint(); + TranslationBlock *EntryTB = TC->EntryTB; + + hqemu::MutexGuard locked(llvm_global_lock); + + for (unsigned i = 0; i != NI.NumChainSlot; ++i) + ChainPoint[NI.ChainSlot[i].Key] = NI.ChainSlot[i].Addr; + + TraceID tid = LLEnv->insertTransCode(TC); + EntryTB->tid = tid; + EntryTB->mode = BLOCK_OPTIMIZED; + EntryTB->opt_ptr = TC->Code; + + /* Set the jump from the block to the trace */ + patch_jmp(tb_get_jmp_entry(EntryTB), TC->Code); + + if (!SP->isEnabled()) { + delete Trace; + TC->Trace = nullptr; + } + + delete Opt; +} + +void LLVMTranslator::dump(CPUArchState *env, TranslationBlock *tb) +{ + auto &DebugMode = DM.getDebugMode(); + if (DebugMode & (DEBUG_INASM | DEBUG_OP)) { + hqemu::MutexGuard locked(llvm_debug_lock); + dbg() << DEBUG_LLVM << "Translator " << MyID << " dumps asm...\n"; + if (DebugMode & DEBUG_INASM) + printAsm(Env, tb); + if (DebugMode & DEBUG_OP) + printOp(Env, tb); + } +} + +void LLVMTranslator::GenBlock(CPUArchState *env, OptimizationInfo *Opt) +{ + struct timeval start, end; + if (SP->isEnabled()) + gettimeofday(&start, nullptr); + + TraceBuilder Builder(IF, Opt); + GraphNode *Node = Builder.getNextNode(); + if (!Node) + hqemu_error("fatal error.\n"); + + Builder.ConvertToTCGIR(env); + + if (DM.getDebugMode() & (DEBUG_INASM | DEBUG_OP)) + dump(env, Opt->getCFG()->getTB()); + + Builder.ConvertToLLVMIR(); + Builder.Finalize(); + + if (SP->isEnabled()) { + gettimeofday(&end, nullptr); + Builder.getTrace()->setTransTime(&start, &end); + } + + Commit(Builder); +} + +void LLVMTranslator::GenTrace(CPUArchState *env, OptimizationInfo *Opt) +{ + struct timeval start, end; + if (SP->isEnabled()) + gettimeofday(&start, nullptr); + + TraceBuilder Builder(IF, Opt); + for (;;) { + GraphNode *Node = Builder.getNextNode(); + if (!Node) + break; + + Builder.ConvertToTCGIR(Env); + + if (DM.getDebugMode() & (DEBUG_INASM | DEBUG_OP)) + dump(Env, Node->getTB()); + + Builder.ConvertToLLVMIR(); + + if (Node->getTB()->mode == BLOCK_INVALID || Builder.isAborted()) { + Abort(Builder); + return; + } + } + Builder.Finalize(); + + if (SP->isEnabled()) { + gettimeofday(&end, nullptr); + Builder.getTrace()->setTransTime(&start, &end); + } + + Commit(Builder); +} + +/* Display the guest assembly code of the given basic block. */ +void LLVMTranslator::printAsm(CPUArchState *env, TranslationBlock *tb) +{ + auto &OS = DM.debug(); + if (GuestDisAsm) { + OS << "----------------\n" + << "IN: [size=" << tb->size << "]\n"; +#if defined(CONFIG_USER_ONLY) + GuestDisAsm->PrintInAsm((uint64_t)g2h(tb->pc), tb->size, tb->pc); +#else + GuestDisAsm->PrintInAsm((uint64_t)tb->image, tb->size, tb->pc); +#endif + OS << "\n"; + return; + } + +#if defined(CONFIG_USER_ONLY) + /* The guest is not supported by the LLVM MCDisassembler. Use QEMU disas. */ + int disas_flags = 0; + +#if defined(TARGET_I386) + #if defined(TARGET_X86_64) + if ((tb->flags >> HF_CS64_SHIFT) & 1) + disas_flags = 2; + else + #endif + disas_flags = !((tb->flags >> HF_CS32_SHIFT) & 1); +#elif defined(TARGET_ARM) + #if defined(TARGET_AARCH64) + disas_flags = 4 | (0 << 1); + #else + disas_flags = env->thumb; + #endif +#elif defined(TARGET_PPC) + int le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0; + disas_flags = env->bfd_mach; + disas_flags |= le_mode << 16; +#endif + + OS << "----------------\n"; + OS << "IN: [size=" << tb->size << "%d]\n"; + target_disas(stderr, ENV_GET_CPU(env), tb->pc, tb->size, disas_flags); + OS << "\n"; +#endif +} + +extern "C" void printops(const char *outbuf) { + DM.debug() << outbuf; +} + +/* Display TCG IR of the given basic block. */ +void LLVMTranslator::printOp(CPUArchState *env, TranslationBlock *tb) +{ + auto &OS = DM.debug(); + OS << "OP:\n"; + tcg_dump_ops_fn(&tcg_ctx, printops); + OS << "\n"; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ |