diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Driver/Driver.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Driver/Driver.cpp | 374 |
1 files changed, 272 insertions, 102 deletions
diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp index b9dc35d..180c412 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp @@ -174,8 +174,10 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, } else if ((PhaseArg = DAL.getLastArg(options::OPT_S))) { FinalPhase = phases::Backend; - // -c only runs up to the assembler. - } else if ((PhaseArg = DAL.getLastArg(options::OPT_c))) { + // -c and partial CUDA compilations only run up to the assembler. + } else if ((PhaseArg = DAL.getLastArg(options::OPT_c)) || + (PhaseArg = DAL.getLastArg(options::OPT_cuda_device_only)) || + (PhaseArg = DAL.getLastArg(options::OPT_cuda_host_only))) { FinalPhase = phases::Assemble; // Otherwise do everything. @@ -281,6 +283,83 @@ DerivedArgList *Driver::TranslateInputArgs(const InputArgList &Args) const { return DAL; } +/// \brief Compute target triple from args. +/// +/// This routine provides the logic to compute a target triple from various +/// args passed to the driver and the default triple string. +static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple, + const ArgList &Args, + StringRef DarwinArchName = "") { + // FIXME: Already done in Compilation *Driver::BuildCompilation + if (const Arg *A = Args.getLastArg(options::OPT_target)) + DefaultTargetTriple = A->getValue(); + + llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple)); + + // Handle Apple-specific options available here. + if (Target.isOSBinFormatMachO()) { + // If an explict Darwin arch name is given, that trumps all. + if (!DarwinArchName.empty()) { + tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName); + return Target; + } + + // Handle the Darwin '-arch' flag. + if (Arg *A = Args.getLastArg(options::OPT_arch)) { + StringRef ArchName = A->getValue(); + tools::darwin::setTripleTypeForMachOArchName(Target, ArchName); + } + } + + // Handle pseudo-target flags '-mlittle-endian'/'-EL' and + // '-mbig-endian'/'-EB'. + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) { + if (A->getOption().matches(options::OPT_mlittle_endian)) { + llvm::Triple LE = Target.getLittleEndianArchVariant(); + if (LE.getArch() != llvm::Triple::UnknownArch) + Target = std::move(LE); + } else { + llvm::Triple BE = Target.getBigEndianArchVariant(); + if (BE.getArch() != llvm::Triple::UnknownArch) + Target = std::move(BE); + } + } + + // Skip further flag support on OSes which don't support '-m32' or '-m64'. + if (Target.getArchName() == "tce" || Target.getOS() == llvm::Triple::Minix) + return Target; + + // Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'. + if (Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32, + options::OPT_m32, options::OPT_m16)) { + llvm::Triple::ArchType AT = llvm::Triple::UnknownArch; + + if (A->getOption().matches(options::OPT_m64)) { + AT = Target.get64BitArchVariant().getArch(); + if (Target.getEnvironment() == llvm::Triple::GNUX32) + Target.setEnvironment(llvm::Triple::GNU); + } else if (A->getOption().matches(options::OPT_mx32) && + Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) { + AT = llvm::Triple::x86_64; + Target.setEnvironment(llvm::Triple::GNUX32); + } else if (A->getOption().matches(options::OPT_m32)) { + AT = Target.get32BitArchVariant().getArch(); + if (Target.getEnvironment() == llvm::Triple::GNUX32) + Target.setEnvironment(llvm::Triple::GNU); + } else if (A->getOption().matches(options::OPT_m16) && + Target.get32BitArchVariant().getArch() == llvm::Triple::x86) { + AT = llvm::Triple::x86; + Target.setEnvironment(llvm::Triple::CODE16); + } + + if (AT != llvm::Triple::UnknownArch && AT != Target.getArch()) + Target.setArch(AT); + } + + return Target; +} + Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { llvm::PrettyStackTraceString CrashInfo("Compilation construction"); @@ -367,7 +446,8 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { DerivedArgList *TranslatedArgs = TranslateInputArgs(*UArgs); // Owned by the host. - const ToolChain &TC = getToolChain(*UArgs); + const ToolChain &TC = + getToolChain(*UArgs, computeTargetTriple(DefaultTargetTriple, *UArgs)); // The compilation takes ownership of Args. Compilation *C = new Compilation(*this, TC, UArgs.release(), TranslatedArgs); @@ -398,6 +478,19 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { return C; } +static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) { + llvm::opt::ArgStringList ASL; + for (const auto *A : Args) + A->render(Args, ASL); + + for (auto I = ASL.begin(), E = ASL.end(); I != E; ++I) { + if (I != ASL.begin()) + OS << ' '; + Command::printArg(OS, *I, true); + } + OS << '\n'; +} + // When clang crashes, produce diagnostic information including the fully // preprocessed source file(s). Request that the developer attach the // diagnostic information to a bug report. @@ -546,7 +639,9 @@ void Driver::generateCompilationDiagnostics(Compilation &C, << "Error generating run script: " + Script + " " + EC.message(); } else { ScriptOS << "# Crash reproducer for " << getClangFullVersion() << "\n" - << "# Original command: "; + << "# Driver args: "; + printArgList(ScriptOS, C.getInputArgs()); + ScriptOS << "# Original command: "; Cmd.Print(ScriptOS, "\n", /*Quote=*/true); Cmd.Print(ScriptOS, "\n", /*Quote=*/true, &CrashInfo); Diag(clang::diag::note_drv_command_failed_diag_msg) << Script; @@ -787,14 +882,6 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { } return false; } - - if (C.getArgs().hasArg(options::OPT_print_multi_os_directory)) { - // FIXME: This should print out "lib/../lib", "lib/../lib64", or - // "lib/../lib32" as appropriate for the toolchain. For now, print - // nothing because it's not supported yet. - return false; - } - return true; } @@ -815,9 +902,20 @@ static unsigned PrintActions1(const Compilation &C, Action *A, } else if (BindArchAction *BIA = dyn_cast<BindArchAction>(A)) { os << '"' << BIA->getArchName() << '"' << ", {" << PrintActions1(C, *BIA->begin(), Ids) << "}"; + } else if (CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { + os << '"' << CDA->getGpuArchName() << '"' << ", {" + << PrintActions1(C, *CDA->begin(), Ids) << "}"; } else { + ActionList *AL; + if (CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { + os << "{" << PrintActions1(C, *CHA->begin(), Ids) << "}" + << ", gpu binaries "; + AL = &CHA->getDeviceActions(); + } else + AL = &A->getInputs(); + const char *Prefix = "{"; - for (Action *PreRequisite : *A) { + for (Action *PreRequisite : *AL) { os << Prefix << PrintActions1(C, PreRequisite, Ids); Prefix = ", "; } @@ -1130,6 +1228,93 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, } } +// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE input +// action and then wraps each in CudaDeviceAction paired with appropriate GPU +// arch name. If we're only building device-side code, each action remains +// independent. Otherwise we pass device-side actions as inputs to a new +// CudaHostAction which combines both host and device side actions. +static std::unique_ptr<Action> +buildCudaActions(const Driver &D, const ToolChain &TC, DerivedArgList &Args, + const Arg *InputArg, const types::ID InputType, + std::unique_ptr<Action> Current, ActionList &Actions) { + + assert(InputType == types::TY_CUDA && + "CUDA Actions only apply to CUDA inputs."); + + // Collect all cuda_gpu_arch parameters, removing duplicates. + SmallVector<const char *, 4> GpuArchList; + llvm::StringSet<> GpuArchNames; + for (Arg *A : Args) { + if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) { + A->claim(); + if (GpuArchNames.insert(A->getValue()).second) + GpuArchList.push_back(A->getValue()); + } + } + + // Default to sm_20 which is the lowest common denominator for supported GPUs. + // sm_20 code should work correctly, if suboptimally, on all newer GPUs. + if (GpuArchList.empty()) + GpuArchList.push_back("sm_20"); + + // Replicate inputs for each GPU architecture. + Driver::InputList CudaDeviceInputs; + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) + CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); + + // Build actions for all device inputs. + ActionList CudaDeviceActions; + D.BuildActions(TC, Args, CudaDeviceInputs, CudaDeviceActions); + assert(GpuArchList.size() == CudaDeviceActions.size() && + "Failed to create actions for all devices"); + + // Check whether any of device actions stopped before they could generate PTX. + bool PartialCompilation = false; + bool DeviceOnlyCompilation = Args.hasArg(options::OPT_cuda_device_only); + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) { + if (CudaDeviceActions[i]->getKind() != Action::BackendJobClass) { + PartialCompilation = true; + break; + } + } + + // Figure out what to do with device actions -- pass them as inputs to the + // host action or run each of them independently. + if (PartialCompilation || DeviceOnlyCompilation) { + // In case of partial or device-only compilation results of device actions + // are not consumed by the host action device actions have to be added to + // top-level actions list with AtTopLevel=true and run independently. + + // -o is ambiguous if we have more than one top-level action. + if (Args.hasArg(options::OPT_o) && + (!DeviceOnlyCompilation || GpuArchList.size() > 1)) { + D.Diag(clang::diag::err_drv_output_argument_with_multiple_files); + return nullptr; + } + + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) + Actions.push_back( + new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]), + GpuArchList[i], /* AtTopLevel */ true)); + // Kill host action in case of device-only compilation. + if (DeviceOnlyCompilation) + Current.reset(nullptr); + return Current; + } else { + // Outputs of device actions during complete CUDA compilation get created + // with AtTopLevel=false and become inputs for the host action. + ActionList DeviceActions; + for (unsigned i = 0, e = GpuArchList.size(); i != e; ++i) + DeviceActions.push_back( + new CudaDeviceAction(std::unique_ptr<Action>(CudaDeviceActions[i]), + GpuArchList[i], /* AtTopLevel */ false)); + // Return a new host action that incorporates original host action and all + // device actions. + return std::unique_ptr<Action>( + new CudaHostAction(std::move(Current), DeviceActions)); + } +} + void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { llvm::PrettyStackTraceString CrashInfo("Building compilation actions"); @@ -1227,6 +1412,25 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, continue; } + phases::ID CudaInjectionPhase; + if (isSaveTempsEnabled()) { + // All phases are done independently, inject GPU blobs during compilation + // phase as that's where we generate glue code to init them. + CudaInjectionPhase = phases::Compile; + } else { + // Assumes that clang does everything up until linking phase, so we inject + // cuda device actions at the last step before linking. Otherwise CUDA + // host action forces preprocessor into a separate invocation. + if (FinalPhase == phases::Link) { + for (auto i = PL.begin(), e = PL.end(); i != e; ++i) { + auto next = i + 1; + if (next != e && *next == phases::Link) + CudaInjectionPhase = *i; + } + } else + CudaInjectionPhase = FinalPhase; + } + // Build the pipeline for this file. std::unique_ptr<Action> Current(new InputAction(*InputArg, InputType)); for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end(); @@ -1252,6 +1456,15 @@ void Driver::BuildActions(const ToolChain &TC, DerivedArgList &Args, // Otherwise construct the appropriate action. Current = ConstructPhaseAction(TC, Args, Phase, std::move(Current)); + + if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase && + !Args.hasArg(options::OPT_cuda_host_only)) { + Current = buildCudaActions(*this, TC, Args, InputArg, InputType, + std::move(Current), Actions); + if (!Current) + break; + } + if (Current->getType() == types::TY_Nothing) break; } @@ -1491,7 +1704,13 @@ static const Tool *SelectToolForJob(Compilation &C, bool SaveTemps, if (isa<BackendJobAction>(JA)) { // Check if the compiler supports emitting LLVM IR. assert(Inputs->size() == 1); - JobAction *CompileJA = cast<CompileJobAction>(*Inputs->begin()); + JobAction *CompileJA; + // Extract real host action, if it's a CudaHostAction. + if (CudaHostAction *CudaHA = dyn_cast<CudaHostAction>(*Inputs->begin())) + CompileJA = cast<CompileJobAction>(*CudaHA->begin()); + else + CompileJA = cast<CompileJobAction>(*Inputs->begin()); + const Tool *Compiler = TC->SelectTool(*CompileJA); if (!Compiler) return nullptr; @@ -1525,6 +1744,20 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, InputInfo &Result) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); + InputInfoList CudaDeviceInputInfos; + if (const CudaHostAction *CHA = dyn_cast<CudaHostAction>(A)) { + InputInfo II; + // Append outputs of device jobs to the input list. + for (const Action *DA : CHA->getDeviceActions()) { + BuildJobsForAction(C, DA, TC, "", AtTopLevel, + /*MultipleArchs*/ false, LinkingOutput, II); + CudaDeviceInputInfos.push_back(II); + } + // Override current action with a real host compile action and continue + // processing it. + A = *CHA->begin(); + } + if (const InputAction *IA = dyn_cast<InputAction>(A)) { // FIXME: It would be nice to not claim this here; maybe the old scheme of // just using Args was better? @@ -1544,15 +1777,30 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, const char *ArchName = BAA->getArchName(); if (ArchName) - TC = &getToolChain(C.getArgs(), ArchName); + TC = &getToolChain( + C.getArgs(), + computeTargetTriple(DefaultTargetTriple, C.getArgs(), ArchName)); else TC = &C.getDefaultToolChain(); - BuildJobsForAction(C, *BAA->begin(), TC, BAA->getArchName(), AtTopLevel, + BuildJobsForAction(C, *BAA->begin(), TC, ArchName, AtTopLevel, MultipleArchs, LinkingOutput, Result); return; } + if (const CudaDeviceAction *CDA = dyn_cast<CudaDeviceAction>(A)) { + // Figure out which NVPTX triple to use for device-side compilation based on + // whether host is 64-bit. + llvm::Triple DeviceTriple(C.getDefaultToolChain().getTriple().isArch64Bit() + ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + BuildJobsForAction(C, *CDA->begin(), + &getToolChain(C.getArgs(), DeviceTriple), + CDA->getGpuArchName(), CDA->isAtTopLevel(), + /*MultipleArchs*/ true, LinkingOutput, Result); + return; + } + const ActionList *Inputs = &A->getInputs(); const JobAction *JA = cast<JobAction>(A); @@ -1584,6 +1832,10 @@ void Driver::BuildJobsForAction(Compilation &C, const Action *A, if (JA->getType() == types::TY_dSYM) BaseInput = InputInfos[0].getFilename(); + // Append outputs of cuda device jobs to the input list + if (CudaDeviceInputInfos.size()) + InputInfos.append(CudaDeviceInputInfos.begin(), CudaDeviceInputInfos.end()); + // Determine the place to write output to, if any. if (JA->getType() == types::TY_Nothing) Result = InputInfo(A->getType(), BaseInput); @@ -1899,93 +2151,8 @@ std::string Driver::GetTemporaryPath(StringRef Prefix, return Path.str(); } -/// \brief Compute target triple from args. -/// -/// This routine provides the logic to compute a target triple from various -/// args passed to the driver and the default triple string. -static llvm::Triple computeTargetTriple(StringRef DefaultTargetTriple, - const ArgList &Args, - StringRef DarwinArchName) { - // FIXME: Already done in Compilation *Driver::BuildCompilation - if (const Arg *A = Args.getLastArg(options::OPT_target)) - DefaultTargetTriple = A->getValue(); - - llvm::Triple Target(llvm::Triple::normalize(DefaultTargetTriple)); - - // Handle Apple-specific options available here. - if (Target.isOSBinFormatMachO()) { - // If an explict Darwin arch name is given, that trumps all. - if (!DarwinArchName.empty()) { - tools::darwin::setTripleTypeForMachOArchName(Target, DarwinArchName); - return Target; - } - - // Handle the Darwin '-arch' flag. - if (Arg *A = Args.getLastArg(options::OPT_arch)) { - StringRef ArchName = A->getValue(); - tools::darwin::setTripleTypeForMachOArchName(Target, ArchName); - } - } - - // Handle pseudo-target flags '-mlittle-endian'/'-EL' and - // '-mbig-endian'/'-EB'. - if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, - options::OPT_mbig_endian)) { - if (A->getOption().matches(options::OPT_mlittle_endian)) { - if (Target.getArch() == llvm::Triple::mips) - Target.setArch(llvm::Triple::mipsel); - else if (Target.getArch() == llvm::Triple::mips64) - Target.setArch(llvm::Triple::mips64el); - else if (Target.getArch() == llvm::Triple::aarch64_be) - Target.setArch(llvm::Triple::aarch64); - } else { - if (Target.getArch() == llvm::Triple::mipsel) - Target.setArch(llvm::Triple::mips); - else if (Target.getArch() == llvm::Triple::mips64el) - Target.setArch(llvm::Triple::mips64); - else if (Target.getArch() == llvm::Triple::aarch64) - Target.setArch(llvm::Triple::aarch64_be); - } - } - - // Skip further flag support on OSes which don't support '-m32' or '-m64'. - if (Target.getArchName() == "tce" || Target.getOS() == llvm::Triple::Minix) - return Target; - - // Handle pseudo-target flags '-m64', '-mx32', '-m32' and '-m16'. - if (Arg *A = Args.getLastArg(options::OPT_m64, options::OPT_mx32, - options::OPT_m32, options::OPT_m16)) { - llvm::Triple::ArchType AT = llvm::Triple::UnknownArch; - - if (A->getOption().matches(options::OPT_m64)) { - AT = Target.get64BitArchVariant().getArch(); - if (Target.getEnvironment() == llvm::Triple::GNUX32) - Target.setEnvironment(llvm::Triple::GNU); - } else if (A->getOption().matches(options::OPT_mx32) && - Target.get64BitArchVariant().getArch() == llvm::Triple::x86_64) { - AT = llvm::Triple::x86_64; - Target.setEnvironment(llvm::Triple::GNUX32); - } else if (A->getOption().matches(options::OPT_m32)) { - AT = Target.get32BitArchVariant().getArch(); - if (Target.getEnvironment() == llvm::Triple::GNUX32) - Target.setEnvironment(llvm::Triple::GNU); - } else if (A->getOption().matches(options::OPT_m16) && - Target.get32BitArchVariant().getArch() == llvm::Triple::x86) { - AT = llvm::Triple::x86; - Target.setEnvironment(llvm::Triple::CODE16); - } - - if (AT != llvm::Triple::UnknownArch && AT != Target.getArch()) - Target.setArch(AT); - } - - return Target; -} - const ToolChain &Driver::getToolChain(const ArgList &Args, - StringRef DarwinArchName) const { - llvm::Triple Target = - computeTargetTriple(DefaultTargetTriple, Args, DarwinArchName); + const llvm::Triple &Target) const { ToolChain *&TC = ToolChains[Target.str()]; if (!TC) { @@ -2050,6 +2217,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, break; } break; + case llvm::Triple::CUDA: + TC = new toolchains::CudaToolChain(*this, Target, Args); + break; default: // Of these targets, Hexagon is the only one that might have // an OS of Linux, in which case it got handled above already. |