diff options
Diffstat (limited to 'contrib/llvm/tools/clang/lib/Driver/Driver.cpp')
-rw-r--r-- | contrib/llvm/tools/clang/lib/Driver/Driver.cpp | 1924 |
1 files changed, 1499 insertions, 425 deletions
diff --git a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp index 02f4a99..15f830d 100644 --- a/contrib/llvm/tools/clang/lib/Driver/Driver.cpp +++ b/contrib/llvm/tools/clang/lib/Driver/Driver.cpp @@ -32,7 +32,6 @@ #include "llvm/Option/OptSpecifier.h" #include "llvm/Option/OptTable.h" #include "llvm/Option/Option.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -43,6 +42,9 @@ #include <map> #include <memory> #include <utility> +#if LLVM_ON_UNIX +#include <unistd.h> // getpid +#endif using namespace clang::driver; using namespace clang; @@ -55,12 +57,12 @@ Driver::Driver(StringRef ClangExecutable, StringRef DefaultTargetTriple, Mode(GCCMode), SaveTemps(SaveTempsNone), BitcodeEmbed(EmbedNone), LTOMode(LTOK_None), ClangExecutable(ClangExecutable), SysRoot(DEFAULT_SYSROOT), UseStdLib(true), - DefaultTargetTriple(DefaultTargetTriple), DriverTitle("clang LLVM compiler"), CCPrintOptionsFilename(nullptr), CCPrintHeadersFilename(nullptr), CCLogDiagnosticsFilename(nullptr), CCCPrintBindings(false), CCPrintHeaders(false), CCLogDiagnostics(false), - CCGenDiagnostics(false), CCCGenericGCCName(""), CheckInputsExist(true), - CCCUsePCH(true), SuppressMissingInputWarning(false) { + CCGenDiagnostics(false), DefaultTargetTriple(DefaultTargetTriple), + CCCGenericGCCName(""), CheckInputsExist(true), CCCUsePCH(true), + SuppressMissingInputWarning(false) { // Provide a sane fallback if no VFS is specified. if (!this->VFS) @@ -89,31 +91,39 @@ Driver::~Driver() { llvm::DeleteContainerSeconds(ToolChains); } -void Driver::ParseDriverMode(ArrayRef<const char *> Args) { - const std::string OptName = - getOpts().getOption(options::OPT_driver_mode).getPrefixedName(); +void Driver::ParseDriverMode(StringRef ProgramName, + ArrayRef<const char *> Args) { + auto Default = ToolChain::getTargetAndModeFromProgramName(ProgramName); + StringRef DefaultMode(Default.second); + setDriverModeFromOption(DefaultMode); for (const char *ArgPtr : Args) { // Ingore nullptrs, they are response file's EOL markers if (ArgPtr == nullptr) continue; const StringRef Arg = ArgPtr; - if (!Arg.startswith(OptName)) - continue; + setDriverModeFromOption(Arg); + } +} - const StringRef Value = Arg.drop_front(OptName.size()); - const unsigned M = llvm::StringSwitch<unsigned>(Value) - .Case("gcc", GCCMode) - .Case("g++", GXXMode) - .Case("cpp", CPPMode) - .Case("cl", CLMode) - .Default(~0U); +void Driver::setDriverModeFromOption(StringRef Opt) { + const std::string OptName = + getOpts().getOption(options::OPT_driver_mode).getPrefixedName(); + if (!Opt.startswith(OptName)) + return; + StringRef Value = Opt.drop_front(OptName.size()); - if (M != ~0U) - Mode = static_cast<DriverMode>(M); - else - Diag(diag::err_drv_unsupported_option_argument) << OptName << Value; - } + const unsigned M = llvm::StringSwitch<unsigned>(Value) + .Case("gcc", GCCMode) + .Case("g++", GXXMode) + .Case("cpp", CPPMode) + .Case("cl", CLMode) + .Default(~0U); + + if (M != ~0U) + Mode = static_cast<DriverMode>(M); + else + Diag(diag::err_drv_unsupported_option_argument) << OptName << Value; } InputArgList Driver::ParseArgStrings(ArrayRef<const char *> ArgStrings) { @@ -170,6 +180,10 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, (PhaseArg = DAL.getLastArg(options::OPT__SLASH_P))) { FinalPhase = phases::Preprocess; + // --precompile only runs up to precompilation. + } else if ((PhaseArg = DAL.getLastArg(options::OPT__precompile))) { + FinalPhase = phases::Precompile; + // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || @@ -423,6 +437,32 @@ void Driver::setLTOMode(const llvm::opt::ArgList &Args) { } } +/// Compute the desired OpenMP runtime from the flags provided. +Driver::OpenMPRuntimeKind Driver::getOpenMPRuntime(const ArgList &Args) const { + StringRef RuntimeName(CLANG_DEFAULT_OPENMP_RUNTIME); + + const Arg *A = Args.getLastArg(options::OPT_fopenmp_EQ); + if (A) + RuntimeName = A->getValue(); + + auto RT = llvm::StringSwitch<OpenMPRuntimeKind>(RuntimeName) + .Case("libomp", OMPRT_OMP) + .Case("libgomp", OMPRT_GOMP) + .Case("libiomp5", OMPRT_IOMP5) + .Default(OMPRT_Unknown); + + if (RT == OMPRT_Unknown) { + if (A) + Diag(diag::err_drv_unsupported_option_argument) + << A->getOption().getName() << A->getValue(); + else + // FIXME: We could use a nicer diagnostic here. + Diag(diag::err_drv_unsupported_opt) << "-fopenmp"; + } + + return RT; +} + void Driver::CreateOffloadingDeviceToolChains(Compilation &C, InputList &Inputs) { @@ -433,14 +473,71 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, if (llvm::any_of(Inputs, [](std::pair<types::ID, const llvm::opt::Arg *> &I) { return types::isCuda(I.first); })) { - const ToolChain &TC = getToolChain( - C.getInputArgs(), - llvm::Triple(C.getSingleOffloadToolChain<Action::OFK_Host>() - ->getTriple() - .isArch64Bit() - ? "nvptx64-nvidia-cuda" - : "nvptx-nvidia-cuda")); - C.addOffloadDeviceToolChain(&TC, Action::OFK_Cuda); + const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); + const llvm::Triple &HostTriple = HostTC->getTriple(); + llvm::Triple CudaTriple(HostTriple.isArch64Bit() ? "nvptx64-nvidia-cuda" + : "nvptx-nvidia-cuda"); + // Use the CUDA and host triples as the key into the ToolChains map, because + // the device toolchain we create depends on both. + ToolChain *&CudaTC = ToolChains[CudaTriple.str() + "/" + HostTriple.str()]; + if (!CudaTC) { + CudaTC = new toolchains::CudaToolChain(*this, CudaTriple, *HostTC, + C.getInputArgs()); + } + C.addOffloadDeviceToolChain(CudaTC, Action::OFK_Cuda); + } + + // + // OpenMP + // + // We need to generate an OpenMP toolchain if the user specified targets with + // the -fopenmp-targets option. + if (Arg *OpenMPTargets = + C.getInputArgs().getLastArg(options::OPT_fopenmp_targets_EQ)) { + if (OpenMPTargets->getNumValues()) { + // We expect that -fopenmp-targets is always used in conjunction with the + // option -fopenmp specifying a valid runtime with offloading support, + // i.e. libomp or libiomp. + bool HasValidOpenMPRuntime = C.getInputArgs().hasFlag( + options::OPT_fopenmp, options::OPT_fopenmp_EQ, + options::OPT_fno_openmp, false); + if (HasValidOpenMPRuntime) { + OpenMPRuntimeKind OpenMPKind = getOpenMPRuntime(C.getInputArgs()); + HasValidOpenMPRuntime = + OpenMPKind == OMPRT_OMP || OpenMPKind == OMPRT_IOMP5; + } + + if (HasValidOpenMPRuntime) { + llvm::StringMap<const char *> FoundNormalizedTriples; + for (const char *Val : OpenMPTargets->getValues()) { + llvm::Triple TT(Val); + std::string NormalizedName = TT.normalize(); + + // Make sure we don't have a duplicate triple. + auto Duplicate = FoundNormalizedTriples.find(NormalizedName); + if (Duplicate != FoundNormalizedTriples.end()) { + Diag(clang::diag::warn_drv_omp_offload_target_duplicate) + << Val << Duplicate->second; + continue; + } + + // Store the current triple so that we can check for duplicates in the + // following iterations. + FoundNormalizedTriples[NormalizedName] = Val; + + // If the specified target is invalid, emit a diagnostic. + if (TT.getArch() == llvm::Triple::UnknownArch) + Diag(clang::diag::err_drv_invalid_omp_target) << Val; + else { + const ToolChain &TC = getToolChain(C.getInputArgs(), TT); + C.addOffloadDeviceToolChain(&TC, Action::OFK_OpenMP); + } + } + } else + Diag(clang::diag::err_drv_expecting_fopenmp_with_fopenmp_targets); + } else + Diag(clang::diag::warn_drv_empty_joined_argument) + << OpenMPTargets->getAsString(C.getInputArgs()); } // @@ -456,8 +553,9 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { // FIXME: Handle environment options which affect driver behavior, somewhere // (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS. - if (char *env = ::getenv("COMPILER_PATH")) { - StringRef CompilerPath = env; + if (Optional<std::string> CompilerPathValue = + llvm::sys::Process::GetEnv("COMPILER_PATH")) { + StringRef CompilerPath = *CompilerPathValue; while (!CompilerPath.empty()) { std::pair<StringRef, StringRef> Split = CompilerPath.split(llvm::sys::EnvPathSeparator); @@ -468,7 +566,7 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { // We look for the driver mode option early, because the mode can affect // how other options are parsed. - ParseDriverMode(ArgList.slice(1)); + ParseDriverMode(ClangExecutable, ArgList.slice(1)); // FIXME: What are we going to do with -V and -b? @@ -535,26 +633,20 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) { setLTOMode(Args); - // Ignore -fembed-bitcode options with LTO - // since the output will be bitcode anyway. - if (getLTOMode() == LTOK_None) { - if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { - StringRef Name = A->getValue(); - unsigned Model = llvm::StringSwitch<unsigned>(Name) - .Case("off", EmbedNone) - .Case("all", EmbedBitcode) - .Case("bitcode", EmbedBitcode) - .Case("marker", EmbedMarker) - .Default(~0U); - if (Model == ~0U) { - Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) - << Name; - } else - BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model); - } - } else { - // claim the bitcode option under LTO so no warning is issued. - Args.ClaimAllArgs(options::OPT_fembed_bitcode_EQ); + // Process -fembed-bitcode= flags. + if (Arg *A = Args.getLastArg(options::OPT_fembed_bitcode_EQ)) { + StringRef Name = A->getValue(); + unsigned Model = llvm::StringSwitch<unsigned>(Name) + .Case("off", EmbedNone) + .Case("all", EmbedBitcode) + .Case("bitcode", EmbedBitcode) + .Case("marker", EmbedMarker) + .Default(~0U); + if (Model == ~0U) { + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) + << Name; + } else + BitcodeEmbed = static_cast<BitcodeEmbedMode>(Model); } std::unique_ptr<llvm::opt::InputArgList> UArgs = @@ -610,6 +702,95 @@ static void printArgList(raw_ostream &OS, const llvm::opt::ArgList &Args) { OS << '\n'; } +bool Driver::getCrashDiagnosticFile(StringRef ReproCrashFilename, + SmallString<128> &CrashDiagDir) { + using namespace llvm::sys; + assert(llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin() && + "Only knows about .crash files on Darwin"); + + // The .crash file can be found on at ~/Library/Logs/DiagnosticReports/ + // (or /Library/Logs/DiagnosticReports for root) and has the filename pattern + // clang-<VERSION>_<YYYY-MM-DD-HHMMSS>_<hostname>.crash. + path::home_directory(CrashDiagDir); + if (CrashDiagDir.startswith("/var/root")) + CrashDiagDir = "/"; + path::append(CrashDiagDir, "Library/Logs/DiagnosticReports"); + int PID = +#if LLVM_ON_UNIX + getpid(); +#else + 0; +#endif + std::error_code EC; + fs::file_status FileStatus; + TimePoint<> LastAccessTime; + SmallString<128> CrashFilePath; + // Lookup the .crash files and get the one generated by a subprocess spawned + // by this driver invocation. + for (fs::directory_iterator File(CrashDiagDir, EC), FileEnd; + File != FileEnd && !EC; File.increment(EC)) { + StringRef FileName = path::filename(File->path()); + if (!FileName.startswith(Name)) + continue; + if (fs::status(File->path(), FileStatus)) + continue; + llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> CrashFile = + llvm::MemoryBuffer::getFile(File->path()); + if (!CrashFile) + continue; + // The first line should start with "Process:", otherwise this isn't a real + // .crash file. + StringRef Data = CrashFile.get()->getBuffer(); + if (!Data.startswith("Process:")) + continue; + // Parse parent process pid line, e.g: "Parent Process: clang-4.0 [79141]" + size_t ParentProcPos = Data.find("Parent Process:"); + if (ParentProcPos == StringRef::npos) + continue; + size_t LineEnd = Data.find_first_of("\n", ParentProcPos); + if (LineEnd == StringRef::npos) + continue; + StringRef ParentProcess = Data.slice(ParentProcPos+15, LineEnd).trim(); + int OpenBracket = -1, CloseBracket = -1; + for (size_t i = 0, e = ParentProcess.size(); i < e; ++i) { + if (ParentProcess[i] == '[') + OpenBracket = i; + if (ParentProcess[i] == ']') + CloseBracket = i; + } + // Extract the parent process PID from the .crash file and check whether + // it matches this driver invocation pid. + int CrashPID; + if (OpenBracket < 0 || CloseBracket < 0 || + ParentProcess.slice(OpenBracket + 1, CloseBracket) + .getAsInteger(10, CrashPID) || CrashPID != PID) { + continue; + } + + // Found a .crash file matching the driver pid. To avoid getting an older + // and misleading crash file, continue looking for the most recent. + // FIXME: the driver can dispatch multiple cc1 invocations, leading to + // multiple crashes poiting to the same parent process. Since the driver + // does not collect pid information for the dispatched invocation there's + // currently no way to distinguish among them. + const auto FileAccessTime = FileStatus.getLastModificationTime(); + if (FileAccessTime > LastAccessTime) { + CrashFilePath.assign(File->path()); + LastAccessTime = FileAccessTime; + } + } + + // If found, copy it over to the location of other reproducer files. + if (!CrashFilePath.empty()) { + EC = fs::copy_file(CrashFilePath, ReproCrashFilename); + if (EC) + return false; + return true; + } + + return false; +} + // When clang crashes, produce diagnostic information including the fully // preprocessed source file(s). Request that the developer attach the // diagnostic information to a bug report. @@ -737,8 +918,13 @@ void Driver::generateCompilationDiagnostics(Compilation &C, "Preprocessed source(s) and associated run script(s) are located at:"; SmallString<128> VFS; + SmallString<128> ReproCrashFilename; for (const char *TempFile : TempFiles) { Diag(clang::diag::note_drv_command_failed_diag_msg) << TempFile; + if (ReproCrashFilename.empty()) { + ReproCrashFilename = TempFile; + llvm::sys::path::replace_extension(ReproCrashFilename, ".crash"); + } if (StringRef(TempFile).endswith(".cache")) { // In some cases (modules) we'll dump extra data to help with reproducing // the crash into a directory next to the output. @@ -766,6 +952,24 @@ void Driver::generateCompilationDiagnostics(Compilation &C, Diag(clang::diag::note_drv_command_failed_diag_msg) << Script; } + // On darwin, provide information about the .crash diagnostic report. + if (llvm::Triple(llvm::sys::getProcessTriple()).isOSDarwin()) { + SmallString<128> CrashDiagDir; + if (getCrashDiagnosticFile(ReproCrashFilename, CrashDiagDir)) { + Diag(clang::diag::note_drv_command_failed_diag_msg) + << ReproCrashFilename.str(); + } else { // Suggest a directory for the user to look for .crash files. + llvm::sys::path::append(CrashDiagDir, Name); + CrashDiagDir += "_<YYYY-MM-DD-HHMMSS>_<hostname>.crash"; + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "Crash backtrace is located in"; + Diag(clang::diag::note_drv_command_failed_diag_msg) + << CrashDiagDir.str(); + Diag(clang::diag::note_drv_command_failed_diag_msg) + << "(choose the .crash file that corresponds to your crash)"; + } + } + for (const auto &A : C.getArgs().filtered(options::OPT_frewrite_map_file, options::OPT_frewrite_map_file_EQ)) Diag(clang::diag::note_drv_command_failed_diag_msg) << A->getValue(); @@ -783,8 +987,7 @@ void Driver::setUpResponseFiles(Compilation &C, Command &Cmd) { return; std::string TmpName = GetTemporaryPath("response", "txt"); - Cmd.setResponseFile( - C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str()))); + Cmd.setResponseFile(C.addTempFile(C.getArgs().MakeArgString(TmpName))); } int Driver::ExecuteCompilation( @@ -982,7 +1185,15 @@ bool Driver::HandleImmediateArgs(const Compilation &C) { } if (C.getArgs().hasArg(options::OPT_print_libgcc_file_name)) { - llvm::outs() << GetFilePath("libgcc.a", TC) << "\n"; + ToolChain::RuntimeLibType RLT = TC.GetRuntimeLibType(C.getArgs()); + switch (RLT) { + case ToolChain::RLT_CompilerRT: + llvm::outs() << TC.getCompilerRT(C.getArgs(), "builtins") << "\n"; + break; + case ToolChain::RLT_Libgcc: + llvm::outs() << GetFilePath("libgcc.a", TC) << "\n"; + break; + } return false; } @@ -1388,132 +1599,745 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, } } -// For each unique --cuda-gpu-arch= argument creates a TY_CUDA_DEVICE -// input action and then wraps each in CudaDeviceAction paired with -// appropriate GPU arch name. In case of partial (i.e preprocessing -// only) or device-only compilation, each device action is added to /p -// Actions and /p Current is released. Otherwise the function creates -// and returns a new CudaHostAction which wraps /p Current and device -// side actions. -static Action *buildCudaActions(Compilation &C, DerivedArgList &Args, - const Arg *InputArg, Action *HostAction, - ActionList &Actions) { - Arg *PartialCompilationArg = Args.getLastArg( - options::OPT_cuda_host_only, options::OPT_cuda_device_only, - options::OPT_cuda_compile_host_device); - bool CompileHostOnly = - PartialCompilationArg && - PartialCompilationArg->getOption().matches(options::OPT_cuda_host_only); - bool CompileDeviceOnly = - PartialCompilationArg && - PartialCompilationArg->getOption().matches(options::OPT_cuda_device_only); - - if (CompileHostOnly) { +namespace { +/// Provides a convenient interface for different programming models to generate +/// the required device actions. +class OffloadingActionBuilder final { + /// Flag used to trace errors in the builder. + bool IsValid = false; + + /// The compilation that is using this builder. + Compilation &C; + + /// Map between an input argument and the offload kinds used to process it. + std::map<const Arg *, unsigned> InputArgToOffloadKindMap; + + /// Builder interface. It doesn't build anything or keep any state. + class DeviceActionBuilder { + public: + typedef llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PhasesTy; + + enum ActionBuilderReturnCode { + // The builder acted successfully on the current action. + ABRT_Success, + // The builder didn't have to act on the current action. + ABRT_Inactive, + // The builder was successful and requested the host action to not be + // generated. + ABRT_Ignore_Host, + }; + + protected: + /// Compilation associated with this builder. + Compilation &C; + + /// Tool chains associated with this builder. The same programming + /// model may have associated one or more tool chains. + SmallVector<const ToolChain *, 2> ToolChains; + + /// The derived arguments associated with this builder. + DerivedArgList &Args; + + /// The inputs associated with this builder. + const Driver::InputList &Inputs; + + /// The associated offload kind. + Action::OffloadKind AssociatedOffloadKind = Action::OFK_None; + + public: + DeviceActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs, + Action::OffloadKind AssociatedOffloadKind) + : C(C), Args(Args), Inputs(Inputs), + AssociatedOffloadKind(AssociatedOffloadKind) {} + virtual ~DeviceActionBuilder() {} + + /// Fill up the array \a DA with all the device dependences that should be + /// added to the provided host action \a HostAction. By default it is + /// inactive. + virtual ActionBuilderReturnCode + getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) { + return ABRT_Inactive; + } + + /// Update the state to include the provided host action \a HostAction as a + /// dependency of the current device action. By default it is inactive. + virtual ActionBuilderReturnCode addDeviceDepences(Action *HostAction) { + return ABRT_Inactive; + } + + /// Append top level actions generated by the builder. Return true if errors + /// were found. + virtual void appendTopLevelActions(ActionList &AL) {} + + /// Append linker actions generated by the builder. Return true if errors + /// were found. + virtual void appendLinkDependences(OffloadAction::DeviceDependences &DA) {} + + /// Initialize the builder. Return true if any initialization errors are + /// found. + virtual bool initialize() { return false; } + + /// Return true if the builder can use bundling/unbundling. + virtual bool canUseBundlerUnbundler() const { return false; } + + /// Return true if this builder is valid. We have a valid builder if we have + /// associated device tool chains. + bool isValid() { return !ToolChains.empty(); } + + /// Return the associated offload kind. + Action::OffloadKind getAssociatedOffloadKind() { + return AssociatedOffloadKind; + } + }; + + /// \brief CUDA action builder. It injects device code in the host backend + /// action. + class CudaActionBuilder final : public DeviceActionBuilder { + /// Flags to signal if the user requested host-only or device-only + /// compilation. + bool CompileHostOnly = false; + bool CompileDeviceOnly = false; + + /// List of GPU architectures to use in this compilation. + SmallVector<CudaArch, 4> GpuArchList; + + /// The CUDA actions for the current input. + ActionList CudaDeviceActions; + + /// The CUDA fat binary if it was generated for the current input. + Action *CudaFatBinary = nullptr; + + /// Flag that is set to true if this builder acted on the current input. + bool IsActive = false; + + public: + CudaActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs) + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_Cuda) {} + + ActionBuilderReturnCode + getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) override { + if (!IsActive) + return ABRT_Inactive; + + // If we don't have more CUDA actions, we don't have any dependences to + // create for the host. + if (CudaDeviceActions.empty()) + return ABRT_Success; + + assert(CudaDeviceActions.size() == GpuArchList.size() && + "Expecting one action per GPU architecture."); + assert(!CompileHostOnly && + "Not expecting CUDA actions in host-only compilation."); + + // If we are generating code for the device or we are in a backend phase, + // we attempt to generate the fat binary. We compile each arch to ptx and + // assemble to cubin, then feed the cubin *and* the ptx into a device + // "link" action, which uses fatbinary to combine these cubins into one + // fatbin. The fatbin is then an input to the host action if not in + // device-only mode. + if (CompileDeviceOnly || CurPhase == phases::Backend) { + ActionList DeviceActions; + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { + // Produce the device action from the current phase up to the assemble + // phase. + for (auto Ph : Phases) { + // Skip the phases that were already dealt with. + if (Ph < CurPhase) + continue; + // We have to be consistent with the host final phase. + if (Ph > FinalPhase) + break; + + CudaDeviceActions[I] = C.getDriver().ConstructPhaseAction( + C, Args, Ph, CudaDeviceActions[I]); + + if (Ph == phases::Assemble) + break; + } + + // If we didn't reach the assemble phase, we can't generate the fat + // binary. We don't need to generate the fat binary if we are not in + // device-only mode. + if (!isa<AssembleJobAction>(CudaDeviceActions[I]) || + CompileDeviceOnly) + continue; + + Action *AssembleAction = CudaDeviceActions[I]; + assert(AssembleAction->getType() == types::TY_Object); + assert(AssembleAction->getInputs().size() == 1); + + Action *BackendAction = AssembleAction->getInputs()[0]; + assert(BackendAction->getType() == types::TY_PP_Asm); + + for (auto &A : {AssembleAction, BackendAction}) { + OffloadAction::DeviceDependences DDep; + DDep.add(*A, *ToolChains.front(), CudaArchToString(GpuArchList[I]), + Action::OFK_Cuda); + DeviceActions.push_back( + C.MakeAction<OffloadAction>(DDep, A->getType())); + } + } + + // We generate the fat binary if we have device input actions. + if (!DeviceActions.empty()) { + CudaFatBinary = + C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN); + + if (!CompileDeviceOnly) { + DA.add(*CudaFatBinary, *ToolChains.front(), /*BoundArch=*/nullptr, + Action::OFK_Cuda); + // Clear the fat binary, it is already a dependence to an host + // action. + CudaFatBinary = nullptr; + } + + // Remove the CUDA actions as they are already connected to an host + // action or fat binary. + CudaDeviceActions.clear(); + } + + // We avoid creating host action in device-only mode. + return CompileDeviceOnly ? ABRT_Ignore_Host : ABRT_Success; + } else if (CurPhase > phases::Backend) { + // If we are past the backend phase and still have a device action, we + // don't have to do anything as this action is already a device + // top-level action. + return ABRT_Success; + } + + assert(CurPhase < phases::Backend && "Generating single CUDA " + "instructions should only occur " + "before the backend phase!"); + + // By default, we produce an action for each device arch. + for (Action *&A : CudaDeviceActions) + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + + return ABRT_Success; + } + + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + // While generating code for CUDA, we only depend on the host input action + // to trigger the creation of all the CUDA device actions. + + // If we are dealing with an input action, replicate it for each GPU + // architecture. If we are in host-only mode we return 'success' so that + // the host uses the CUDA offload kind. + if (auto *IA = dyn_cast<InputAction>(HostAction)) { + assert(!GpuArchList.empty() && + "We should have at least one GPU architecture."); + + // If the host input is not CUDA, we don't need to bother about this + // input. + if (IA->getType() != types::TY_CUDA) { + // The builder will ignore this input. + IsActive = false; + return ABRT_Inactive; + } + + // Set the flag to true, so that the builder acts on the current input. + IsActive = true; + + if (CompileHostOnly) + return ABRT_Success; + + // Replicate inputs for each GPU architecture. + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) + CudaDeviceActions.push_back(C.MakeAction<InputAction>( + IA->getInputArg(), types::TY_CUDA_DEVICE)); + + return ABRT_Success; + } + + return IsActive ? ABRT_Success : ABRT_Inactive; + } + + void appendTopLevelActions(ActionList &AL) override { + // Utility to append actions to the top level list. + auto AddTopLevel = [&](Action *A, CudaArch BoundArch) { + OffloadAction::DeviceDependences Dep; + Dep.add(*A, *ToolChains.front(), CudaArchToString(BoundArch), + Action::OFK_Cuda); + AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); + }; + + // If we have a fat binary, add it to the list. + if (CudaFatBinary) { + AddTopLevel(CudaFatBinary, CudaArch::UNKNOWN); + CudaDeviceActions.clear(); + CudaFatBinary = nullptr; + return; + } + + if (CudaDeviceActions.empty()) + return; + + // If we have CUDA actions at this point, that's because we have a have + // partial compilation, so we should have an action for each GPU + // architecture. + assert(CudaDeviceActions.size() == GpuArchList.size() && + "Expecting one action per GPU architecture."); + assert(ToolChains.size() == 1 && + "Expecting to have a sing CUDA toolchain."); + for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) + AddTopLevel(CudaDeviceActions[I], GpuArchList[I]); + + CudaDeviceActions.clear(); + } + + bool initialize() override { + // We don't need to support CUDA. + if (!C.hasOffloadToolChain<Action::OFK_Cuda>()) + return false; + + const ToolChain *HostTC = C.getSingleOffloadToolChain<Action::OFK_Host>(); + assert(HostTC && "No toolchain for host compilation."); + if (HostTC->getTriple().isNVPTX()) { + // We do not support targeting NVPTX for host compilation. Throw + // an error and abort pipeline construction early so we don't trip + // asserts that assume device-side compilation. + C.getDriver().Diag(diag::err_drv_cuda_nvptx_host); + return true; + } + + ToolChains.push_back(C.getSingleOffloadToolChain<Action::OFK_Cuda>()); + + Arg *PartialCompilationArg = Args.getLastArg( + options::OPT_cuda_host_only, options::OPT_cuda_device_only, + options::OPT_cuda_compile_host_device); + CompileHostOnly = PartialCompilationArg && + PartialCompilationArg->getOption().matches( + options::OPT_cuda_host_only); + CompileDeviceOnly = PartialCompilationArg && + PartialCompilationArg->getOption().matches( + options::OPT_cuda_device_only); + + // Collect all cuda_gpu_arch parameters, removing duplicates. + std::set<CudaArch> GpuArchs; + bool Error = false; + for (Arg *A : Args) { + if (!(A->getOption().matches(options::OPT_cuda_gpu_arch_EQ) || + A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ))) + continue; + A->claim(); + + const StringRef ArchStr = A->getValue(); + if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ) && + ArchStr == "all") { + GpuArchs.clear(); + continue; + } + CudaArch Arch = StringToCudaArch(ArchStr); + if (Arch == CudaArch::UNKNOWN) { + C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; + Error = true; + } else if (A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) + GpuArchs.insert(Arch); + else if (A->getOption().matches(options::OPT_no_cuda_gpu_arch_EQ)) + GpuArchs.erase(Arch); + else + llvm_unreachable("Unexpected option."); + } + + // Collect list of GPUs remaining in the set. + for (CudaArch Arch : GpuArchs) + GpuArchList.push_back(Arch); + + // Default to sm_20 which is the lowest common denominator for + // supported GPUs. sm_20 code should work correctly, if + // suboptimally, on all newer GPUs. + if (GpuArchList.empty()) + GpuArchList.push_back(CudaArch::SM_20); + + return Error; + } + }; + + /// OpenMP action builder. The host bitcode is passed to the device frontend + /// and all the device linked images are passed to the host link phase. + class OpenMPActionBuilder final : public DeviceActionBuilder { + /// The OpenMP actions for the current input. + ActionList OpenMPDeviceActions; + + /// The linker inputs obtained for each toolchain. + SmallVector<ActionList, 8> DeviceLinkerInputs; + + public: + OpenMPActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs) + : DeviceActionBuilder(C, Args, Inputs, Action::OFK_OpenMP) {} + + ActionBuilderReturnCode + getDeviceDependences(OffloadAction::DeviceDependences &DA, + phases::ID CurPhase, phases::ID FinalPhase, + PhasesTy &Phases) override { + + // We should always have an action for each input. + assert(OpenMPDeviceActions.size() == ToolChains.size() && + "Number of OpenMP actions and toolchains do not match."); + + // The host only depends on device action in the linking phase, when all + // the device images have to be embedded in the host image. + if (CurPhase == phases::Link) { + assert(ToolChains.size() == DeviceLinkerInputs.size() && + "Toolchains and linker inputs sizes do not match."); + auto LI = DeviceLinkerInputs.begin(); + for (auto *A : OpenMPDeviceActions) { + LI->push_back(A); + ++LI; + } + + // We passed the device action as a host dependence, so we don't need to + // do anything else with them. + OpenMPDeviceActions.clear(); + return ABRT_Success; + } + + // By default, we produce an action for each device arch. + for (Action *&A : OpenMPDeviceActions) + A = C.getDriver().ConstructPhaseAction(C, Args, CurPhase, A); + + return ABRT_Success; + } + + ActionBuilderReturnCode addDeviceDepences(Action *HostAction) override { + + // If this is an input action replicate it for each OpenMP toolchain. + if (auto *IA = dyn_cast<InputAction>(HostAction)) { + OpenMPDeviceActions.clear(); + for (unsigned I = 0; I < ToolChains.size(); ++I) + OpenMPDeviceActions.push_back( + C.MakeAction<InputAction>(IA->getInputArg(), IA->getType())); + return ABRT_Success; + } + + // If this is an unbundling action use it as is for each OpenMP toolchain. + if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(HostAction)) { + OpenMPDeviceActions.clear(); + for (unsigned I = 0; I < ToolChains.size(); ++I) { + OpenMPDeviceActions.push_back(UA); + UA->registerDependentActionInfo( + ToolChains[I], /*BoundArch=*/StringRef(), Action::OFK_OpenMP); + } + return ABRT_Success; + } + + // When generating code for OpenMP we use the host compile phase result as + // a dependence to the device compile phase so that it can learn what + // declarations should be emitted. However, this is not the only use for + // the host action, so we prevent it from being collapsed. + if (isa<CompileJobAction>(HostAction)) { + HostAction->setCannotBeCollapsedWithNextDependentAction(); + assert(ToolChains.size() == OpenMPDeviceActions.size() && + "Toolchains and device action sizes do not match."); + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BoundArch=*/nullptr, Action::OFK_OpenMP); + auto TC = ToolChains.begin(); + for (Action *&A : OpenMPDeviceActions) { + assert(isa<CompileJobAction>(A)); + OffloadAction::DeviceDependences DDep; + DDep.add(*A, **TC, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + A = C.MakeAction<OffloadAction>(HDep, DDep); + ++TC; + } + } + return ABRT_Success; + } + + void appendTopLevelActions(ActionList &AL) override { + if (OpenMPDeviceActions.empty()) + return; + + // We should always have an action for each input. + assert(OpenMPDeviceActions.size() == ToolChains.size() && + "Number of OpenMP actions and toolchains do not match."); + + // Append all device actions followed by the proper offload action. + auto TI = ToolChains.begin(); + for (auto *A : OpenMPDeviceActions) { + OffloadAction::DeviceDependences Dep; + Dep.add(*A, **TI, /*BoundArch=*/nullptr, Action::OFK_OpenMP); + AL.push_back(C.MakeAction<OffloadAction>(Dep, A->getType())); + ++TI; + } + // We no longer need the action stored in this builder. + OpenMPDeviceActions.clear(); + } + + void appendLinkDependences(OffloadAction::DeviceDependences &DA) override { + assert(ToolChains.size() == DeviceLinkerInputs.size() && + "Toolchains and linker inputs sizes do not match."); + + // Append a new link action for each device. + auto TC = ToolChains.begin(); + for (auto &LI : DeviceLinkerInputs) { + auto *DeviceLinkAction = + C.MakeAction<LinkJobAction>(LI, types::TY_Image); + DA.add(*DeviceLinkAction, **TC, /*BoundArch=*/nullptr, + Action::OFK_OpenMP); + ++TC; + } + } + + bool initialize() override { + // Get the OpenMP toolchains. If we don't get any, the action builder will + // know there is nothing to do related to OpenMP offloading. + auto OpenMPTCRange = C.getOffloadToolChains<Action::OFK_OpenMP>(); + for (auto TI = OpenMPTCRange.first, TE = OpenMPTCRange.second; TI != TE; + ++TI) + ToolChains.push_back(TI->second); + + DeviceLinkerInputs.resize(ToolChains.size()); + return false; + } + + bool canUseBundlerUnbundler() const override { + // OpenMP should use bundled files whenever possible. + return true; + } + }; + + /// + /// TODO: Add the implementation for other specialized builders here. + /// + + /// Specialized builders being used by this offloading action builder. + SmallVector<DeviceActionBuilder *, 4> SpecializedBuilders; + + /// Flag set to true if all valid builders allow file bundling/unbundling. + bool CanUseBundler; + +public: + OffloadingActionBuilder(Compilation &C, DerivedArgList &Args, + const Driver::InputList &Inputs) + : C(C) { + // Create a specialized builder for each device toolchain. + + IsValid = true; + + // Create a specialized builder for CUDA. + SpecializedBuilders.push_back(new CudaActionBuilder(C, Args, Inputs)); + + // Create a specialized builder for OpenMP. + SpecializedBuilders.push_back(new OpenMPActionBuilder(C, Args, Inputs)); + + // + // TODO: Build other specialized builders here. + // + + // Initialize all the builders, keeping track of errors. If all valid + // builders agree that we can use bundling, set the flag to true. + unsigned ValidBuilders = 0u; + unsigned ValidBuildersSupportingBundling = 0u; + for (auto *SB : SpecializedBuilders) { + IsValid = IsValid && !SB->initialize(); + + // Update the counters if the builder is valid. + if (SB->isValid()) { + ++ValidBuilders; + if (SB->canUseBundlerUnbundler()) + ++ValidBuildersSupportingBundling; + } + } + CanUseBundler = + ValidBuilders && ValidBuilders == ValidBuildersSupportingBundling; + } + + ~OffloadingActionBuilder() { + for (auto *SB : SpecializedBuilders) + delete SB; + } + + /// Generate an action that adds device dependences (if any) to a host action. + /// If no device dependence actions exist, just return the host action \a + /// HostAction. If an error is found or if no builder requires the host action + /// to be generated, return nullptr. + Action * + addDeviceDependencesToHostAction(Action *HostAction, const Arg *InputArg, + phases::ID CurPhase, phases::ID FinalPhase, + DeviceActionBuilder::PhasesTy &Phases) { + if (!IsValid) + return nullptr; + + if (SpecializedBuilders.empty()) + return HostAction; + + assert(HostAction && "Invalid host action!"); + + OffloadAction::DeviceDependences DDeps; + // Check if all the programming models agree we should not emit the host + // action. Also, keep track of the offloading kinds employed. + auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; + unsigned InactiveBuilders = 0u; + unsigned IgnoringBuilders = 0u; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) { + ++InactiveBuilders; + continue; + } + + auto RetCode = + SB->getDeviceDependences(DDeps, CurPhase, FinalPhase, Phases); + + // If the builder explicitly says the host action should be ignored, + // we need to increment the variable that tracks the builders that request + // the host object to be ignored. + if (RetCode == DeviceActionBuilder::ABRT_Ignore_Host) + ++IgnoringBuilders; + + // Unless the builder was inactive for this action, we have to record the + // offload kind because the host will have to use it. + if (RetCode != DeviceActionBuilder::ABRT_Inactive) + OffloadKind |= SB->getAssociatedOffloadKind(); + } + + // If all builders agree that the host object should be ignored, just return + // nullptr. + if (IgnoringBuilders && + SpecializedBuilders.size() == (InactiveBuilders + IgnoringBuilders)) + return nullptr; + + if (DDeps.getActions().empty()) + return HostAction; + + // We have dependences we need to bundle together. We use an offload action + // for that. OffloadAction::HostDependence HDep( *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), - /*BoundArch=*/nullptr, Action::OFK_Cuda); - return C.MakeAction<OffloadAction>(HDep); + /*BoundArch=*/nullptr, DDeps); + return C.MakeAction<OffloadAction>(HDep, DDeps); } - // Collect all cuda_gpu_arch parameters, removing duplicates. - SmallVector<CudaArch, 4> GpuArchList; - llvm::SmallSet<CudaArch, 4> GpuArchs; - for (Arg *A : Args) { - if (!A->getOption().matches(options::OPT_cuda_gpu_arch_EQ)) - continue; - A->claim(); + /// Generate an action that adds a host dependence to a device action. The + /// results will be kept in this action builder. Return true if an error was + /// found. + bool addHostDependenceToDeviceActions(Action *&HostAction, + const Arg *InputArg) { + if (!IsValid) + return true; - const auto &ArchStr = A->getValue(); - CudaArch Arch = StringToCudaArch(ArchStr); - if (Arch == CudaArch::UNKNOWN) - C.getDriver().Diag(clang::diag::err_drv_cuda_bad_gpu_arch) << ArchStr; - else if (GpuArchs.insert(Arch).second) - GpuArchList.push_back(Arch); - } - - // Default to sm_20 which is the lowest common denominator for supported GPUs. - // sm_20 code should work correctly, if suboptimally, on all newer GPUs. - if (GpuArchList.empty()) - GpuArchList.push_back(CudaArch::SM_20); - - // Replicate inputs for each GPU architecture. - Driver::InputList CudaDeviceInputs; - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) - CudaDeviceInputs.push_back(std::make_pair(types::TY_CUDA_DEVICE, InputArg)); - - // Build actions for all device inputs. - ActionList CudaDeviceActions; - C.getDriver().BuildActions(C, Args, CudaDeviceInputs, CudaDeviceActions); - assert(GpuArchList.size() == CudaDeviceActions.size() && - "Failed to create actions for all devices"); - - // Check whether any of device actions stopped before they could generate PTX. - bool PartialCompilation = - llvm::any_of(CudaDeviceActions, [](const Action *a) { - return a->getKind() != Action::AssembleJobClass; - }); + // If we are supporting bundling/unbundling and the current action is an + // input action of non-source file, we replace the host action by the + // unbundling action. The bundler tool has the logic to detect if an input + // is a bundle or not and if the input is not a bundle it assumes it is a + // host file. Therefore it is safe to create an unbundling action even if + // the input is not a bundle. + if (CanUseBundler && isa<InputAction>(HostAction) && + InputArg->getOption().getKind() == llvm::opt::Option::InputClass && + !types::isSrcFile(HostAction->getType())) { + auto UnbundlingHostAction = + C.MakeAction<OffloadUnbundlingJobAction>(HostAction); + UnbundlingHostAction->registerDependentActionInfo( + C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BoundArch=*/StringRef(), Action::OFK_Host); + HostAction = UnbundlingHostAction; + } - const ToolChain *CudaTC = C.getSingleOffloadToolChain<Action::OFK_Cuda>(); + assert(HostAction && "Invalid host action!"); - // Figure out what to do with device actions -- pass them as inputs to the - // host action or run each of them independently. - if (PartialCompilation || CompileDeviceOnly) { - // In case of partial or device-only compilation results of device actions - // are not consumed by the host action device actions have to be added to - // top-level actions list with AtTopLevel=true and run independently. + // Register the offload kinds that are used. + auto &OffloadKind = InputArgToOffloadKindMap[InputArg]; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; - // -o is ambiguous if we have more than one top-level action. - if (Args.hasArg(options::OPT_o) && - (!CompileDeviceOnly || GpuArchList.size() > 1)) { - C.getDriver().Diag( - clang::diag::err_drv_output_argument_with_multiple_files); - return nullptr; + auto RetCode = SB->addDeviceDepences(HostAction); + + // Host dependences for device actions are not compatible with that same + // action being ignored. + assert(RetCode != DeviceActionBuilder::ABRT_Ignore_Host && + "Host dependence not expected to be ignored.!"); + + // Unless the builder was inactive for this action, we have to record the + // offload kind because the host will have to use it. + if (RetCode != DeviceActionBuilder::ABRT_Inactive) + OffloadKind |= SB->getAssociatedOffloadKind(); } - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - OffloadAction::DeviceDependences DDep; - DDep.add(*CudaDeviceActions[I], *CudaTC, CudaArchToString(GpuArchList[I]), - Action::OFK_Cuda); - Actions.push_back( - C.MakeAction<OffloadAction>(DDep, CudaDeviceActions[I]->getType())); + return false; + } + + /// Add the offloading top level actions to the provided action list. This + /// function can replace the host action by a bundling action if the + /// programming models allow it. + bool appendTopLevelActions(ActionList &AL, Action *HostAction, + const Arg *InputArg) { + // Get the device actions to be appended. + ActionList OffloadAL; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + SB->appendTopLevelActions(OffloadAL); } - // Kill host action in case of device-only compilation. - if (CompileDeviceOnly) - return nullptr; - return HostAction; - } - - // If we're not a partial or device-only compilation, we compile each arch to - // ptx and assemble to cubin, then feed the cubin *and* the ptx into a device - // "link" action, which uses fatbinary to combine these cubins into one - // fatbin. The fatbin is then an input to the host compilation. - ActionList DeviceActions; - for (unsigned I = 0, E = GpuArchList.size(); I != E; ++I) { - Action* AssembleAction = CudaDeviceActions[I]; - assert(AssembleAction->getType() == types::TY_Object); - assert(AssembleAction->getInputs().size() == 1); - - Action* BackendAction = AssembleAction->getInputs()[0]; - assert(BackendAction->getType() == types::TY_PP_Asm); - - for (auto &A : {AssembleAction, BackendAction}) { - OffloadAction::DeviceDependences DDep; - DDep.add(*A, *CudaTC, CudaArchToString(GpuArchList[I]), Action::OFK_Cuda); - DeviceActions.push_back(C.MakeAction<OffloadAction>(DDep, A->getType())); - } - } - auto FatbinAction = - C.MakeAction<LinkJobAction>(DeviceActions, types::TY_CUDA_FATBIN); - - // Return a new host action that incorporates original host action and all - // device actions. - OffloadAction::HostDependence HDep( - *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), - /*BoundArch=*/nullptr, Action::OFK_Cuda); - OffloadAction::DeviceDependences DDep; - DDep.add(*FatbinAction, *CudaTC, /*BoundArch=*/nullptr, Action::OFK_Cuda); - return C.MakeAction<OffloadAction>(HDep, DDep); -} + + // If we can use the bundler, replace the host action by the bundling one in + // the resulting list. Otherwise, just append the device actions. + if (CanUseBundler && !OffloadAL.empty()) { + // Add the host action to the list in order to create the bundling action. + OffloadAL.push_back(HostAction); + + // We expect that the host action was just appended to the action list + // before this method was called. + assert(HostAction == AL.back() && "Host action not in the list??"); + HostAction = C.MakeAction<OffloadBundlingJobAction>(OffloadAL); + AL.back() = HostAction; + } else + AL.append(OffloadAL.begin(), OffloadAL.end()); + + // Propagate to the current host action (if any) the offload information + // associated with the current input. + if (HostAction) + HostAction->propagateHostOffloadInfo(InputArgToOffloadKindMap[InputArg], + /*BoundArch=*/nullptr); + return false; + } + + /// Processes the host linker action. This currently consists of replacing it + /// with an offload action if there are device link objects and propagate to + /// the host action all the offload kinds used in the current compilation. The + /// resulting action is returned. + Action *processHostLinkAction(Action *HostAction) { + // Add all the dependences from the device linking actions. + OffloadAction::DeviceDependences DDeps; + for (auto *SB : SpecializedBuilders) { + if (!SB->isValid()) + continue; + + SB->appendLinkDependences(DDeps); + } + + // Calculate all the offload kinds used in the current compilation. + unsigned ActiveOffloadKinds = 0u; + for (auto &I : InputArgToOffloadKindMap) + ActiveOffloadKinds |= I.second; + + // If we don't have device dependencies, we don't have to create an offload + // action. + if (DDeps.getActions().empty()) { + // Propagate all the active kinds to host action. Given that it is a link + // action it is assumed to depend on all actions generated so far. + HostAction->propagateHostOffloadInfo(ActiveOffloadKinds, + /*BoundArch=*/nullptr); + return HostAction; + } + + // Create the offload action with all dependences. When an offload action + // is created the kinds are propagated to the host action, so we don't have + // to do that explicitly here. + OffloadAction::HostDependence HDep( + *HostAction, *C.getSingleOffloadToolChain<Action::OFK_Host>(), + /*BoundArch*/ nullptr, ActiveOffloadKinds); + return C.MakeAction<OffloadAction>(HDep, DDeps); + } +}; +} // anonymous namespace. void Driver::BuildActions(Compilation &C, DerivedArgList &Args, const InputList &Inputs, ActionList &Actions) const { @@ -1621,8 +2445,8 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, YcArg = YuArg = nullptr; } - // Track the host offload kinds used on this compilation. - unsigned CompilationActiveOffloadHostKinds = 0u; + // Builder to be used to build offloading actions. + OffloadingActionBuilder OffloadBuilder(C, Args, Inputs); // Construct the actions to perform. ActionList LinkerInputs; @@ -1670,12 +2494,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, if (YcArg) { // Add a separate precompile phase for the compile phase. if (FinalPhase >= phases::Compile) { + const types::ID HeaderType = lookupHeaderTypeForSourceType(InputType); llvm::SmallVector<phases::ID, phases::MaxNumberOfPhases> PCHPL; - types::getCompilationPhases(types::TY_CXXHeader, PCHPL); + types::getCompilationPhases(HeaderType, PCHPL); Arg *PchInputArg = MakeInputArg(Args, Opts, YcArg->getValue()); // Build the pipeline for the pch file. - Action *ClangClPch = C.MakeAction<InputAction>(*PchInputArg, InputType); + Action *ClangClPch = + C.MakeAction<InputAction>(*PchInputArg, HeaderType); for (phases::ID Phase : PCHPL) ClangClPch = ConstructPhaseAction(C, Args, Phase, ClangClPch); assert(ClangClPch); @@ -1686,17 +2512,14 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, } } - phases::ID CudaInjectionPhase = - (phases::Compile < FinalPhase && - llvm::find(PL, phases::Compile) != PL.end()) - ? phases::Compile - : FinalPhase; - - // Track the host offload kinds used on this input. - unsigned InputActiveOffloadHostKinds = 0u; - // Build the pipeline for this file. Action *Current = C.MakeAction<InputAction>(*InputArg, InputType); + + // Use the current host action in any of the offloading actions, if + // required. + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; + for (SmallVectorImpl<phases::ID>::iterator i = PL.begin(), e = PL.end(); i != e; ++i) { phases::ID Phase = *i; @@ -1705,6 +2528,12 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, if (Phase > FinalPhase) break; + // Add any offload action the host action depends on. + Current = OffloadBuilder.addDeviceDependencesToHostAction( + Current, InputArg, Phase, FinalPhase, PL); + if (!Current) + break; + // Queue linker inputs. if (Phase == phases::Link) { assert((i + 1) == e && "linking must be final compilation step."); @@ -1713,48 +2542,37 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, break; } - // Some types skip the assembler phase (e.g., llvm-bc), but we can't - // encode this in the steps because the intermediate type depends on - // arguments. Just special case here. - if (Phase == phases::Assemble && Current->getType() != types::TY_PP_Asm) - continue; - // Otherwise construct the appropriate action. - Current = ConstructPhaseAction(C, Args, Phase, Current); + auto *NewCurrent = ConstructPhaseAction(C, Args, Phase, Current); - if (InputType == types::TY_CUDA && Phase == CudaInjectionPhase) { - Current = buildCudaActions(C, Args, InputArg, Current, Actions); - if (!Current) - break; + // We didn't create a new action, so we will just move to the next phase. + if (NewCurrent == Current) + continue; - // We produced a CUDA action for this input, so the host has to support - // CUDA. - InputActiveOffloadHostKinds |= Action::OFK_Cuda; - CompilationActiveOffloadHostKinds |= Action::OFK_Cuda; - } + Current = NewCurrent; + + // Use the current host action in any of the offloading actions, if + // required. + if (OffloadBuilder.addHostDependenceToDeviceActions(Current, InputArg)) + break; if (Current->getType() == types::TY_Nothing) break; } - // If we ended with something, add to the output list. Also, propagate the - // offload information to the top-level host action related with the current - // input. - if (Current) { - if (InputActiveOffloadHostKinds) - Current->propagateHostOffloadInfo(InputActiveOffloadHostKinds, - /*BoundArch=*/nullptr); + // If we ended with something, add to the output list. + if (Current) Actions.push_back(Current); - } + + // Add any top level actions generated for offloading. + OffloadBuilder.appendTopLevelActions(Actions, Current, InputArg); } - // Add a link action if necessary and propagate the offload information for - // the current compilation. + // Add a link action if necessary. if (!LinkerInputs.empty()) { - Actions.push_back( - C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image)); - Actions.back()->propagateHostOffloadInfo(CompilationActiveOffloadHostKinds, - /*BoundArch=*/nullptr); + Action *LA = C.MakeAction<LinkJobAction>(LinkerInputs, types::TY_Image); + LA = OffloadBuilder.processHostLinkAction(LA); + Actions.push_back(LA); } // If we are linking, claim any options which are obviously only used for @@ -1776,6 +2594,13 @@ void Driver::BuildActions(Compilation &C, DerivedArgList &Args, Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args, phases::ID Phase, Action *Input) const { llvm::PrettyStackTraceString CrashInfo("Constructing phase actions"); + + // Some types skip the assembler phase (e.g., llvm-bc), but we can't + // encode this in the steps because the intermediate type depends on + // arguments. Just special case here. + if (Phase == phases::Assemble && Input->getType() != types::TY_PP_Asm) + return Input; + // Build the appropriate action. switch (Phase) { case phases::Link: @@ -1797,7 +2622,9 @@ Action *Driver::ConstructPhaseAction(Compilation &C, const ArgList &Args, return C.MakeAction<PreprocessJobAction>(Input, OutputTy); } case phases::Precompile: { - types::ID OutputTy = types::TY_PCH; + types::ID OutputTy = getPrecompiledType(Input->getType()); + assert(OutputTy != types::TY_INVALID && + "Cannot precompile this input type!"); if (Args.hasArg(options::OPT_fsyntax_only)) { // Syntax checks should not emit a PCH file OutputTy = types::TY_Nothing; @@ -1888,11 +2715,11 @@ void Driver::BuildJobs(Compilation &C) const { } BuildJobsForAction(C, A, &C.getDefaultToolChain(), - /*BoundArch*/ nullptr, + /*BoundArch*/ StringRef(), /*AtTopLevel*/ true, /*MultipleArchs*/ ArchNames.size() > 1, /*LinkingOutput*/ LinkingOutput, CachedResults, - /*BuildForOffloadDevice*/ false); + /*TargetDeviceOffloadKind*/ Action::OFK_None); } // If the user passed -Qunused-arguments or there were errors, don't warn @@ -1941,177 +2768,335 @@ void Driver::BuildJobs(Compilation &C) const { } } } -/// Collapse an offloading action looking for a job of the given type. The input -/// action is changed to the input of the collapsed sequence. If we effectively -/// had a collapse return the corresponding offloading action, otherwise return -/// null. -template <typename T> -static OffloadAction *collapseOffloadingAction(Action *&CurAction) { - if (!CurAction) - return nullptr; - if (auto *OA = dyn_cast<OffloadAction>(CurAction)) { - if (OA->hasHostDependence()) - if (auto *HDep = dyn_cast<T>(OA->getHostDependence())) { - CurAction = HDep; - return OA; - } - if (OA->hasSingleDeviceDependence()) - if (auto *DDep = dyn_cast<T>(OA->getSingleDeviceDependence())) { - CurAction = DDep; - return OA; + +namespace { +/// Utility class to control the collapse of dependent actions and select the +/// tools accordingly. +class ToolSelector final { + /// The tool chain this selector refers to. + const ToolChain &TC; + + /// The compilation this selector refers to. + const Compilation &C; + + /// The base action this selector refers to. + const JobAction *BaseAction; + + /// Set to true if the current toolchain refers to host actions. + bool IsHostSelector; + + /// Set to true if save-temps and embed-bitcode functionalities are active. + bool SaveTemps; + bool EmbedBitcode; + + /// Get previous dependent action or null if that does not exist. If + /// \a CanBeCollapsed is false, that action must be legal to collapse or + /// null will be returned. + const JobAction *getPrevDependentAction(const ActionList &Inputs, + ActionList &SavedOffloadAction, + bool CanBeCollapsed = true) { + // An option can be collapsed only if it has a single input. + if (Inputs.size() != 1) + return nullptr; + + Action *CurAction = *Inputs.begin(); + if (CanBeCollapsed && + !CurAction->isCollapsingWithNextDependentActionLegal()) + return nullptr; + + // If the input action is an offload action. Look through it and save any + // offload action that can be dropped in the event of a collapse. + if (auto *OA = dyn_cast<OffloadAction>(CurAction)) { + // If the dependent action is a device action, we will attempt to collapse + // only with other device actions. Otherwise, we would do the same but + // with host actions only. + if (!IsHostSelector) { + if (OA->hasSingleDeviceDependence(/*DoNotConsiderHostActions=*/true)) { + CurAction = + OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true); + if (CanBeCollapsed && + !CurAction->isCollapsingWithNextDependentActionLegal()) + return nullptr; + SavedOffloadAction.push_back(OA); + return dyn_cast<JobAction>(CurAction); + } + } else if (OA->hasHostDependence()) { + CurAction = OA->getHostDependence(); + if (CanBeCollapsed && + !CurAction->isCollapsingWithNextDependentActionLegal()) + return nullptr; + SavedOffloadAction.push_back(OA); + return dyn_cast<JobAction>(CurAction); } - } - return nullptr; -} -// Returns a Tool for a given JobAction. In case the action and its -// predecessors can be combined, updates Inputs with the inputs of the -// first combined action. If one of the collapsed actions is a -// CudaHostAction, updates CollapsedCHA with the pointer to it so the -// caller can deal with extra handling such action requires. -static const Tool *selectToolForJob(Compilation &C, bool SaveTemps, - bool EmbedBitcode, const ToolChain *TC, - const JobAction *JA, - const ActionList *&Inputs, - ActionList &CollapsedOffloadAction) { - const Tool *ToolForJob = nullptr; - CollapsedOffloadAction.clear(); - - // See if we should look for a compiler with an integrated assembler. We match - // bottom up, so what we are actually looking for is an assembler job with a - // compiler input. - - // Look through offload actions between assembler and backend actions. - Action *BackendJA = (isa<AssembleJobAction>(JA) && Inputs->size() == 1) - ? *Inputs->begin() - : nullptr; - auto *BackendOA = collapseOffloadingAction<BackendJobAction>(BackendJA); - - if (TC->useIntegratedAs() && !SaveTemps && - !C.getArgs().hasArg(options::OPT_via_file_asm) && - !C.getArgs().hasArg(options::OPT__SLASH_FA) && - !C.getArgs().hasArg(options::OPT__SLASH_Fa) && BackendJA && - isa<BackendJobAction>(BackendJA)) { - // A BackendJob is always preceded by a CompileJob, and without -save-temps - // or -fembed-bitcode, they will always get combined together, so instead of - // checking the backend tool, check if the tool for the CompileJob has an - // integrated assembler. For -fembed-bitcode, CompileJob is still used to - // look up tools for BackendJob, but they need to match before we can split - // them. - - // Look through offload actions between backend and compile actions. - Action *CompileJA = *BackendJA->getInputs().begin(); - auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA); - - assert(CompileJA && isa<CompileJobAction>(CompileJA) && - "Backend job is not preceeded by compile job."); - const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA)); - if (!Compiler) return nullptr; + } + + return dyn_cast<JobAction>(CurAction); + } + + /// Return true if an assemble action can be collapsed. + bool canCollapseAssembleAction() const { + return TC.useIntegratedAs() && !SaveTemps && + !C.getArgs().hasArg(options::OPT_via_file_asm) && + !C.getArgs().hasArg(options::OPT__SLASH_FA) && + !C.getArgs().hasArg(options::OPT__SLASH_Fa); + } + + /// Return true if a preprocessor action can be collapsed. + bool canCollapsePreprocessorAction() const { + return !C.getArgs().hasArg(options::OPT_no_integrated_cpp) && + !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps && + !C.getArgs().hasArg(options::OPT_rewrite_objc); + } + + /// Struct that relates an action with the offload actions that would be + /// collapsed with it. + struct JobActionInfo final { + /// The action this info refers to. + const JobAction *JA = nullptr; + /// The offload actions we need to take care off if this action is + /// collapsed. + ActionList SavedOffloadAction; + }; + + /// Append collapsed offload actions from the give nnumber of elements in the + /// action info array. + static void AppendCollapsedOffloadAction(ActionList &CollapsedOffloadAction, + ArrayRef<JobActionInfo> &ActionInfo, + unsigned ElementNum) { + assert(ElementNum <= ActionInfo.size() && "Invalid number of elements."); + for (unsigned I = 0; I < ElementNum; ++I) + CollapsedOffloadAction.append(ActionInfo[I].SavedOffloadAction.begin(), + ActionInfo[I].SavedOffloadAction.end()); + } + + /// Functions that attempt to perform the combining. They detect if that is + /// legal, and if so they update the inputs \a Inputs and the offload action + /// that were collapsed in \a CollapsedOffloadAction. A tool that deals with + /// the combined action is returned. If the combining is not legal or if the + /// tool does not exist, null is returned. + /// Currently three kinds of collapsing are supported: + /// - Assemble + Backend + Compile; + /// - Assemble + Backend ; + /// - Backend + Compile. + const Tool * + combineAssembleBackendCompile(ArrayRef<JobActionInfo> ActionInfo, + const ActionList *&Inputs, + ActionList &CollapsedOffloadAction) { + if (ActionInfo.size() < 3 || !canCollapseAssembleAction()) + return nullptr; + auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA); + auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA); + auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[2].JA); + if (!AJ || !BJ || !CJ) + return nullptr; + + // Get compiler tool. + const Tool *T = TC.SelectTool(*CJ); + if (!T) + return nullptr; + // When using -fembed-bitcode, it is required to have the same tool (clang) // for both CompilerJA and BackendJA. Otherwise, combine two stages. if (EmbedBitcode) { - JobAction *InputJA = cast<JobAction>(*Inputs->begin()); - const Tool *BackendTool = TC->SelectTool(*InputJA); - if (BackendTool == Compiler) - CompileJA = InputJA; - } - if (Compiler->hasIntegratedAssembler()) { - Inputs = &CompileJA->getInputs(); - ToolForJob = Compiler; - // Save the collapsed offload actions because they may still contain - // device actions. - if (CompileOA) - CollapsedOffloadAction.push_back(CompileOA); - if (BackendOA) - CollapsedOffloadAction.push_back(BackendOA); - } - } - - // A backend job should always be combined with the preceding compile job - // unless OPT_save_temps or OPT_fembed_bitcode is enabled and the compiler is - // capable of emitting LLVM IR as an intermediate output. - if (isa<BackendJobAction>(JA)) { - // Check if the compiler supports emitting LLVM IR. - assert(Inputs->size() == 1); - - // Look through offload actions between backend and compile actions. - Action *CompileJA = *JA->getInputs().begin(); - auto *CompileOA = collapseOffloadingAction<CompileJobAction>(CompileJA); - - assert(CompileJA && isa<CompileJobAction>(CompileJA) && - "Backend job is not preceeded by compile job."); - const Tool *Compiler = TC->SelectTool(*cast<CompileJobAction>(CompileJA)); - if (!Compiler) + const Tool *BT = TC.SelectTool(*BJ); + if (BT == T) + return nullptr; + } + + if (!T->hasIntegratedAssembler()) return nullptr; - if (!Compiler->canEmitIR() || - (!SaveTemps && !EmbedBitcode)) { - Inputs = &CompileJA->getInputs(); - ToolForJob = Compiler; - if (CompileOA) - CollapsedOffloadAction.push_back(CompileOA); - } + Inputs = &CJ->getInputs(); + AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, + /*NumElements=*/3); + return T; } + const Tool *combineAssembleBackend(ArrayRef<JobActionInfo> ActionInfo, + const ActionList *&Inputs, + ActionList &CollapsedOffloadAction) { + if (ActionInfo.size() < 2 || !canCollapseAssembleAction()) + return nullptr; + auto *AJ = dyn_cast<AssembleJobAction>(ActionInfo[0].JA); + auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[1].JA); + if (!AJ || !BJ) + return nullptr; - // Otherwise use the tool for the current job. - if (!ToolForJob) - ToolForJob = TC->SelectTool(*JA); + // Retrieve the compile job, backend action must always be preceded by one. + ActionList CompileJobOffloadActions; + auto *CJ = getPrevDependentAction(BJ->getInputs(), CompileJobOffloadActions, + /*CanBeCollapsed=*/false); + if (!AJ || !BJ || !CJ) + return nullptr; - // See if we should use an integrated preprocessor. We do so when we have - // exactly one input, since this is the only use case we care about - // (irrelevant since we don't support combine yet). + assert(isa<CompileJobAction>(CJ) && + "Expecting compile job preceding backend job."); - // Look through offload actions after preprocessing. - Action *PreprocessJA = (Inputs->size() == 1) ? *Inputs->begin() : nullptr; - auto *PreprocessOA = - collapseOffloadingAction<PreprocessJobAction>(PreprocessJA); + // Get compiler tool. + const Tool *T = TC.SelectTool(*CJ); + if (!T) + return nullptr; - if (PreprocessJA && isa<PreprocessJobAction>(PreprocessJA) && - !C.getArgs().hasArg(options::OPT_no_integrated_cpp) && - !C.getArgs().hasArg(options::OPT_traditional_cpp) && !SaveTemps && - !C.getArgs().hasArg(options::OPT_rewrite_objc) && - ToolForJob->hasIntegratedCPP()) { - Inputs = &PreprocessJA->getInputs(); - if (PreprocessOA) - CollapsedOffloadAction.push_back(PreprocessOA); + if (!T->hasIntegratedAssembler()) + return nullptr; + + Inputs = &BJ->getInputs(); + AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, + /*NumElements=*/2); + return T; } + const Tool *combineBackendCompile(ArrayRef<JobActionInfo> ActionInfo, + const ActionList *&Inputs, + ActionList &CollapsedOffloadAction) { + if (ActionInfo.size() < 2 || !canCollapsePreprocessorAction()) + return nullptr; + auto *BJ = dyn_cast<BackendJobAction>(ActionInfo[0].JA); + auto *CJ = dyn_cast<CompileJobAction>(ActionInfo[1].JA); + if (!BJ || !CJ) + return nullptr; + + // Get compiler tool. + const Tool *T = TC.SelectTool(*CJ); + if (!T) + return nullptr; + + if (T->canEmitIR() && (SaveTemps || EmbedBitcode)) + return nullptr; + + Inputs = &CJ->getInputs(); + AppendCollapsedOffloadAction(CollapsedOffloadAction, ActionInfo, + /*NumElements=*/2); + return T; + } + + /// Updates the inputs if the obtained tool supports combining with + /// preprocessor action, and the current input is indeed a preprocessor + /// action. If combining results in the collapse of offloading actions, those + /// are appended to \a CollapsedOffloadAction. + void combineWithPreprocessor(const Tool *T, const ActionList *&Inputs, + ActionList &CollapsedOffloadAction) { + if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP()) + return; + + // Attempt to get a preprocessor action dependence. + ActionList PreprocessJobOffloadActions; + auto *PJ = getPrevDependentAction(*Inputs, PreprocessJobOffloadActions); + if (!PJ || !isa<PreprocessJobAction>(PJ)) + return; + + // This is legal to combine. Append any offload action we found and set the + // current inputs to preprocessor inputs. + CollapsedOffloadAction.append(PreprocessJobOffloadActions.begin(), + PreprocessJobOffloadActions.end()); + Inputs = &PJ->getInputs(); + } + +public: + ToolSelector(const JobAction *BaseAction, const ToolChain &TC, + const Compilation &C, bool SaveTemps, bool EmbedBitcode) + : TC(TC), C(C), BaseAction(BaseAction), SaveTemps(SaveTemps), + EmbedBitcode(EmbedBitcode) { + assert(BaseAction && "Invalid base action."); + IsHostSelector = BaseAction->getOffloadingDeviceKind() == Action::OFK_None; + } + + /// Check if a chain of actions can be combined and return the tool that can + /// handle the combination of actions. The pointer to the current inputs \a + /// Inputs and the list of offload actions \a CollapsedOffloadActions + /// connected to collapsed actions are updated accordingly. The latter enables + /// the caller of the selector to process them afterwards instead of just + /// dropping them. If no suitable tool is found, null will be returned. + const Tool *getTool(const ActionList *&Inputs, + ActionList &CollapsedOffloadAction) { + // + // Get the largest chain of actions that we could combine. + // + + SmallVector<JobActionInfo, 5> ActionChain(1); + ActionChain.back().JA = BaseAction; + while (ActionChain.back().JA) { + const Action *CurAction = ActionChain.back().JA; + + // Grow the chain by one element. + ActionChain.resize(ActionChain.size() + 1); + JobActionInfo &AI = ActionChain.back(); + + // Attempt to fill it with the + AI.JA = + getPrevDependentAction(CurAction->getInputs(), AI.SavedOffloadAction); + } + + // Pop the last action info as it could not be filled. + ActionChain.pop_back(); + + // + // Attempt to combine actions. If all combining attempts failed, just return + // the tool of the provided action. At the end we attempt to combine the + // action with any preprocessor action it may depend on. + // - return ToolForJob; + const Tool *T = combineAssembleBackendCompile(ActionChain, Inputs, + CollapsedOffloadAction); + if (!T) + T = combineAssembleBackend(ActionChain, Inputs, CollapsedOffloadAction); + if (!T) + T = combineBackendCompile(ActionChain, Inputs, CollapsedOffloadAction); + if (!T) { + Inputs = &BaseAction->getInputs(); + T = TC.SelectTool(*BaseAction); + } + + combineWithPreprocessor(T, Inputs, CollapsedOffloadAction); + return T; + } +}; } -InputInfo Driver::BuildJobsForAction( - Compilation &C, const Action *A, const ToolChain *TC, const char *BoundArch, - bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, - std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const { - // The bound arch is not necessarily represented in the toolchain's triple -- - // for example, armv7 and armv7s both map to the same triple -- so we need - // both in our map. +/// Return a string that uniquely identifies the result of a job. The bound arch +/// is not necessarily represented in the toolchain's triple -- for example, +/// armv7 and armv7s both map to the same triple -- so we need both in our map. +/// Also, we need to add the offloading device kind, as the same tool chain can +/// be used for host and device for some programming models, e.g. OpenMP. +static std::string GetTriplePlusArchString(const ToolChain *TC, + StringRef BoundArch, + Action::OffloadKind OffloadKind) { std::string TriplePlusArch = TC->getTriple().normalize(); - if (BoundArch) { + if (!BoundArch.empty()) { TriplePlusArch += "-"; TriplePlusArch += BoundArch; } - std::pair<const Action *, std::string> ActionTC = {A, TriplePlusArch}; + TriplePlusArch += "-"; + TriplePlusArch += Action::GetOffloadKindName(OffloadKind); + return TriplePlusArch; +} + +InputInfo Driver::BuildJobsForAction( + Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, + bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, + std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, + Action::OffloadKind TargetDeviceOffloadKind) const { + std::pair<const Action *, std::string> ActionTC = { + A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; auto CachedResult = CachedResults.find(ActionTC); if (CachedResult != CachedResults.end()) { return CachedResult->second; } InputInfo Result = BuildJobsForActionNoCache( C, A, TC, BoundArch, AtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, BuildForOffloadDevice); + CachedResults, TargetDeviceOffloadKind); CachedResults[ActionTC] = Result; return Result; } InputInfo Driver::BuildJobsForActionNoCache( - Compilation &C, const Action *A, const ToolChain *TC, const char *BoundArch, + Compilation &C, const Action *A, const ToolChain *TC, StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, const char *LinkingOutput, std::map<std::pair<const Action *, std::string>, InputInfo> &CachedResults, - bool BuildForOffloadDevice) const { + Action::OffloadKind TargetDeviceOffloadKind) const { llvm::PrettyStackTraceString CrashInfo("Building compilation jobs"); InputInfoList OffloadDependencesInputInfo; + bool BuildingForOffloadDevice = TargetDeviceOffloadKind != Action::OFK_None; if (const OffloadAction *OA = dyn_cast<OffloadAction>(A)) { // The offload action is expected to be used in four different situations. // @@ -2121,7 +3106,7 @@ InputInfo Driver::BuildJobsForActionNoCache( // b) Set a toolchain/architecture/kind for a device action; // Device Action 1 -> OffloadAction -> Device Action 2 // - // c) Specify a device dependences to a host action; + // c) Specify a device dependence to a host action; // Device Action 1 _ // \ // Host Action 1 ---> OffloadAction -> Host Action 2 @@ -2144,7 +3129,7 @@ InputInfo Driver::BuildJobsForActionNoCache( DevA = BuildJobsForAction(C, DepA, DepTC, DepBoundArch, AtTopLevel, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, - CachedResults, /*BuildForOffloadDevice=*/true); + CachedResults, DepA->getOffloadingDeviceKind()); }); return DevA; } @@ -2154,16 +3139,15 @@ InputInfo Driver::BuildJobsForActionNoCache( // generate the host dependences and override the action with the device // dependence. The dependences can't therefore be a top-level action. OA->doOnEachDependence( - /*IsHostDependence=*/BuildForOffloadDevice, + /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { OffloadDependencesInputInfo.push_back(BuildJobsForAction( C, DepA, DepTC, DepBoundArch, /*AtTopLevel=*/false, /*MultipleArchs*/ !!DepBoundArch, LinkingOutput, CachedResults, - /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() != - Action::OFK_None)); + DepA->getOffloadingDeviceKind())); }); - A = BuildForOffloadDevice + A = BuildingForOffloadDevice ? OA->getSingleDeviceDependence(/*DoNotConsiderHostActions=*/true) : OA->getHostDependence(); } @@ -2182,9 +3166,9 @@ InputInfo Driver::BuildJobsForActionNoCache( if (const BindArchAction *BAA = dyn_cast<BindArchAction>(A)) { const ToolChain *TC; - const char *ArchName = BAA->getArchName(); + StringRef ArchName = BAA->getArchName(); - if (ArchName) + if (!ArchName.empty()) TC = &getToolChain(C.getArgs(), computeTargetTriple(*this, DefaultTargetTriple, C.getArgs(), ArchName)); @@ -2193,7 +3177,7 @@ InputInfo Driver::BuildJobsForActionNoCache( return BuildJobsForAction(C, *BAA->input_begin(), TC, ArchName, AtTopLevel, MultipleArchs, LinkingOutput, CachedResults, - BuildForOffloadDevice); + TargetDeviceOffloadKind); } @@ -2202,9 +3186,9 @@ InputInfo Driver::BuildJobsForActionNoCache( const JobAction *JA = cast<JobAction>(A); ActionList CollapsedOffloadActions; - const Tool *T = - selectToolForJob(C, isSaveTempsEnabled(), embedBitcodeEnabled(), TC, JA, - Inputs, CollapsedOffloadActions); + ToolSelector TS(JA, *TC, C, isSaveTempsEnabled(), embedBitcodeInObject()); + const Tool *T = TS.getTool(Inputs, CollapsedOffloadActions); + if (!T) return InputInfo(); @@ -2212,13 +3196,12 @@ InputInfo Driver::BuildJobsForActionNoCache( // need to build jobs for host/device-side inputs it may have held. for (const auto *OA : CollapsedOffloadActions) cast<OffloadAction>(OA)->doOnEachDependence( - /*IsHostDependence=*/BuildForOffloadDevice, + /*IsHostDependence=*/BuildingForOffloadDevice, [&](Action *DepA, const ToolChain *DepTC, const char *DepBoundArch) { OffloadDependencesInputInfo.push_back(BuildJobsForAction( - C, DepA, DepTC, DepBoundArch, AtTopLevel, + C, DepA, DepTC, DepBoundArch, /* AtTopLevel */ false, /*MultipleArchs=*/!!DepBoundArch, LinkingOutput, CachedResults, - /*BuildForOffloadDevice=*/DepA->getOffloadingDeviceKind() != - Action::OFK_None)); + DepA->getOffloadingDeviceKind())); }); // Only use pipes when there is exactly one input. @@ -2231,7 +3214,7 @@ InputInfo Driver::BuildJobsForActionNoCache( AtTopLevel && (isa<DsymutilJobAction>(A) || isa<VerifyJobAction>(A)); InputInfos.push_back(BuildJobsForAction( C, Input, TC, BoundArch, SubJobAtTopLevel, MultipleArchs, LinkingOutput, - CachedResults, BuildForOffloadDevice)); + CachedResults, A->getOffloadingDeviceKind())); } // Always use the first input as the base input. @@ -2247,15 +3230,75 @@ InputInfo Driver::BuildJobsForActionNoCache( InputInfos.append(OffloadDependencesInputInfo.begin(), OffloadDependencesInputInfo.end()); + // Set the effective triple of the toolchain for the duration of this job. + llvm::Triple EffectiveTriple; + const ToolChain &ToolTC = T->getToolChain(); + const ArgList &Args = + C.getArgsForToolChain(TC, BoundArch, A->getOffloadingDeviceKind()); + if (InputInfos.size() != 1) { + EffectiveTriple = llvm::Triple(ToolTC.ComputeEffectiveClangTriple(Args)); + } else { + // Pass along the input type if it can be unambiguously determined. + EffectiveTriple = llvm::Triple( + ToolTC.ComputeEffectiveClangTriple(Args, InputInfos[0].getType())); + } + RegisterEffectiveTriple TripleRAII(ToolTC, EffectiveTriple); + // Determine the place to write output to, if any. InputInfo Result; - if (JA->getType() == types::TY_Nothing) + InputInfoList UnbundlingResults; + if (auto *UA = dyn_cast<OffloadUnbundlingJobAction>(JA)) { + // If we have an unbundling job, we need to create results for all the + // outputs. We also update the results cache so that other actions using + // this unbundling action can get the right results. + for (auto &UI : UA->getDependentActionsInfo()) { + assert(UI.DependentOffloadKind != Action::OFK_None && + "Unbundling with no offloading??"); + + // Unbundling actions are never at the top level. When we generate the + // offloading prefix, we also do that for the host file because the + // unbundling action does not change the type of the output which can + // cause a overwrite. + std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + UI.DependentOffloadKind, + UI.DependentToolChain->getTriple().normalize(), + /*CreatePrefixForHost=*/true); + auto CurI = InputInfo( + UA, GetNamedOutputPath(C, *UA, BaseInput, UI.DependentBoundArch, + /*AtTopLevel=*/false, MultipleArchs, + OffloadingPrefix), + BaseInput); + // Save the unbundling result. + UnbundlingResults.push_back(CurI); + + // Get the unique string identifier for this dependence and cache the + // result. + CachedResults[{A, GetTriplePlusArchString( + UI.DependentToolChain, UI.DependentBoundArch, + UI.DependentOffloadKind)}] = CurI; + } + + // Now that we have all the results generated, select the one that should be + // returned for the current depending action. + std::pair<const Action *, std::string> ActionTC = { + A, GetTriplePlusArchString(TC, BoundArch, TargetDeviceOffloadKind)}; + assert(CachedResults.find(ActionTC) != CachedResults.end() && + "Result does not exist??"); + Result = CachedResults[ActionTC]; + } else if (JA->getType() == types::TY_Nothing) Result = InputInfo(A, BaseInput); - else + else { + // We only have to generate a prefix for the host if this is not a top-level + // action. + std::string OffloadingPrefix = Action::GetOffloadingFileNamePrefix( + A->getOffloadingDeviceKind(), TC->getTriple().normalize(), + /*CreatePrefixForHost=*/!!A->getOffloadingHostActiveKinds() && + !AtTopLevel); Result = InputInfo(A, GetNamedOutputPath(C, *JA, BaseInput, BoundArch, AtTopLevel, MultipleArchs, - TC->getTriple().normalize()), + OffloadingPrefix), BaseInput); + } if (CCCPrintBindings && !CCGenDiagnostics) { llvm::errs() << "# \"" << T->getToolChain().getTripleString() << '"' @@ -2265,10 +3308,28 @@ InputInfo Driver::BuildJobsForActionNoCache( if (i + 1 != e) llvm::errs() << ", "; } - llvm::errs() << "], output: " << Result.getAsString() << "\n"; + if (UnbundlingResults.empty()) + llvm::errs() << "], output: " << Result.getAsString() << "\n"; + else { + llvm::errs() << "], outputs: ["; + for (unsigned i = 0, e = UnbundlingResults.size(); i != e; ++i) { + llvm::errs() << UnbundlingResults[i].getAsString(); + if (i + 1 != e) + llvm::errs() << ", "; + } + llvm::errs() << "] \n"; + } } else { - T->ConstructJob(C, *JA, Result, InputInfos, - C.getArgsForToolChain(TC, BoundArch), LinkingOutput); + if (UnbundlingResults.empty()) + T->ConstructJob( + C, *JA, Result, InputInfos, + C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), + LinkingOutput); + else + T->ConstructJobMultipleOutputs( + C, *JA, UnbundlingResults, InputInfos, + C.getArgsForToolChain(TC, BoundArch, JA->getOffloadingDeviceKind()), + LinkingOutput); } return Result; } @@ -2313,9 +3374,9 @@ static const char *MakeCLOutputFilename(const ArgList &Args, StringRef ArgValue, const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, const char *BaseInput, - const char *BoundArch, bool AtTopLevel, + StringRef BoundArch, bool AtTopLevel, bool MultipleArchs, - StringRef NormalizedTriple) const { + StringRef OffloadingPrefix) const { llvm::PrettyStackTraceString CrashInfo("Computing output path"); // Output to a user requested destination? if (AtTopLevel && !isa<DsymutilJobAction>(JA) && !isa<VerifyJobAction>(JA)) { @@ -2360,7 +3421,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, std::pair<StringRef, StringRef> Split = Name.split('.'); std::string TmpName = GetTemporaryPath( Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode())); - return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + return C.addTempFile(C.getArgs().MakeArgString(TmpName)); } SmallString<128> BasePath(BaseInput); @@ -2375,7 +3436,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, // Determine what the derived output name should be. const char *NamedOutput; - if (JA.getType() == types::TY_Object && + if ((JA.getType() == types::TY_Object || JA.getType() == types::TY_LTO_BC) && C.getArgs().hasArg(options::OPT__SLASH_Fo, options::OPT__SLASH_o)) { // The /Fo or /o flag decides the object filename. StringRef Val = @@ -2399,17 +3460,17 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, // clang-cl uses BaseName for the executable name. NamedOutput = MakeCLOutputFilename(C.getArgs(), "", BaseName, types::TY_Image); - } else if (MultipleArchs && BoundArch) { + } else { SmallString<128> Output(getDefaultImageName()); - Output += JA.getOffloadingFileNamePrefix(NormalizedTriple); - Output += "-"; - Output.append(BoundArch); + Output += OffloadingPrefix; + if (MultipleArchs && !BoundArch.empty()) { + Output += "-"; + Output.append(BoundArch); + } NamedOutput = C.getArgs().MakeArgString(Output.c_str()); - } else { - NamedOutput = getDefaultImageName(); } } else if (JA.getType() == types::TY_PCH && IsCLMode()) { - NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName).c_str()); + NamedOutput = C.getArgs().MakeArgString(GetClPchPath(C, BaseName)); } else { const char *Suffix = types::getTypeTempSuffix(JA.getType(), IsCLMode()); assert(Suffix && "All types used for output should have a suffix."); @@ -2418,8 +3479,8 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, if (!types::appendSuffixForType(JA.getType())) End = BaseName.rfind('.'); SmallString<128> Suffixed(BaseName.substr(0, End)); - Suffixed += JA.getOffloadingFileNamePrefix(NormalizedTriple); - if (MultipleArchs && BoundArch) { + Suffixed += OffloadingPrefix; + if (MultipleArchs && !BoundArch.empty()) { Suffixed += "-"; Suffixed.append(BoundArch); } @@ -2459,7 +3520,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, std::pair<StringRef, StringRef> Split = Name.split('.'); std::string TmpName = GetTemporaryPath( Split.first, types::getTypeTempSuffix(JA.getType(), IsCLMode())); - return C.addTempFile(C.getArgs().MakeArgString(TmpName.c_str())); + return C.addTempFile(C.getArgs().MakeArgString(TmpName)); } } @@ -2476,7 +3537,7 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, } } -std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const { +std::string Driver::GetFilePath(StringRef Name, const ToolChain &TC) const { // Respect a limited subset of the '-Bprefix' functionality in GCC by // attempting to use this prefix when looking for file paths. for (const std::string &Dir : PrefixDirs) { @@ -2506,16 +3567,16 @@ std::string Driver::GetFilePath(const char *Name, const ToolChain &TC) const { } void Driver::generatePrefixedToolNames( - const char *Tool, const ToolChain &TC, + StringRef Tool, const ToolChain &TC, SmallVectorImpl<std::string> &Names) const { // FIXME: Needs a better variable than DefaultTargetTriple - Names.emplace_back(DefaultTargetTriple + "-" + Tool); + Names.emplace_back((DefaultTargetTriple + "-" + Tool).str()); Names.emplace_back(Tool); // Allow the discovery of tools prefixed with LLVM's default target triple. std::string LLVMDefaultTargetTriple = llvm::sys::getDefaultTargetTriple(); if (LLVMDefaultTargetTriple != DefaultTargetTriple) - Names.emplace_back(LLVMDefaultTargetTriple + "-" + Tool); + Names.emplace_back((LLVMDefaultTargetTriple + "-" + Tool).str()); } static bool ScanDirForExecutable(SmallString<128> &Dir, @@ -2529,8 +3590,7 @@ static bool ScanDirForExecutable(SmallString<128> &Dir, return false; } -std::string Driver::GetProgramPath(const char *Name, - const ToolChain &TC) const { +std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const { SmallVector<std::string, 2> TargetSpecificExecutables; generatePrefixedToolNames(Name, TC, TargetSpecificExecutables); @@ -2542,7 +3602,7 @@ std::string Driver::GetProgramPath(const char *Name, if (ScanDirForExecutable(P, TargetSpecificExecutables)) return P.str(); } else { - SmallString<128> P(PrefixDir + Name); + SmallString<128> P((PrefixDir + Name).str()); if (llvm::sys::fs::can_execute(Twine(P))) return P.str(); } @@ -2564,8 +3624,7 @@ std::string Driver::GetProgramPath(const char *Name, return Name; } -std::string Driver::GetTemporaryPath(StringRef Prefix, - const char *Suffix) const { +std::string Driver::GetTemporaryPath(StringRef Prefix, StringRef Suffix) const { SmallString<128> Path; std::error_code EC = llvm::sys::fs::createTemporaryFile(Prefix, Suffix, Path); if (EC) { @@ -2645,6 +3704,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::NaCl: TC = new toolchains::NaClToolChain(*this, Target, Args); break; + case llvm::Triple::Fuchsia: + TC = new toolchains::Fuchsia(*this, Target, Args); + break; case llvm::Triple::Solaris: TC = new toolchains::Solaris(*this, Target, Args); break; @@ -2673,12 +3735,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, break; } break; - case llvm::Triple::CUDA: - TC = new toolchains::CudaToolChain(*this, Target, Args); - break; case llvm::Triple::PS4: TC = new toolchains::PS4CPU(*this, Target, Args); break; + case llvm::Triple::Contiki: + TC = new toolchains::Contiki(*this, Target, Args); + break; default: // Of these targets, Hexagon is the only one that might have // an OS of Linux, in which case it got handled above already. @@ -2686,6 +3748,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::tce: TC = new toolchains::TCEToolChain(*this, Target, Args); break; + case llvm::Triple::tcele: + TC = new toolchains::TCELEToolChain(*this, Target, Args); + break; case llvm::Triple::hexagon: TC = new toolchains::HexagonToolChain(*this, Target, Args); break; @@ -2699,6 +3764,9 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::wasm64: TC = new toolchains::WebAssembly(*this, Target, Args); break; + case llvm::Triple::avr: + TC = new toolchains::AVRToolChain(*this, Target, Args); + break; default: if (Target.getVendor() == llvm::Triple::Myriad) TC = new toolchains::MyriadToolChain(*this, Target, Args); @@ -2711,6 +3779,12 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, } } } + + // Intentionally omitted from the switch above: llvm::Triple::CUDA. CUDA + // compiles always need two toolchains, the CUDA toolchain and the host + // toolchain. So the only valid way to create a CUDA toolchain is via + // CreateOffloadingDeviceToolChains. + return *TC; } @@ -2733,36 +3807,35 @@ bool Driver::ShouldUseClangCompiler(const JobAction &JA) const { /// /// \return True if the entire string was parsed (9.2), or all groups were /// parsed (10.3.5extrastuff). -bool Driver::GetReleaseVersion(const char *Str, unsigned &Major, - unsigned &Minor, unsigned &Micro, - bool &HadExtra) { +bool Driver::GetReleaseVersion(StringRef Str, unsigned &Major, unsigned &Minor, + unsigned &Micro, bool &HadExtra) { HadExtra = false; Major = Minor = Micro = 0; - if (*Str == '\0') + if (Str.empty()) return false; - char *End; - Major = (unsigned)strtol(Str, &End, 10); - if (*Str != '\0' && *End == '\0') + if (Str.consumeInteger(10, Major)) + return false; + if (Str.empty()) return true; - if (*End != '.') + if (Str[0] != '.') return false; - Str = End + 1; - Minor = (unsigned)strtol(Str, &End, 10); - if (*Str != '\0' && *End == '\0') + Str = Str.drop_front(1); + + if (Str.consumeInteger(10, Minor)) + return false; + if (Str.empty()) return true; - if (*End != '.') + if (Str[0] != '.') return false; + Str = Str.drop_front(1); - Str = End + 1; - Micro = (unsigned)strtol(Str, &End, 10); - if (*Str != '\0' && *End == '\0') - return true; - if (Str == End) + if (Str.consumeInteger(10, Micro)) return false; - HadExtra = true; + if (!Str.empty()) + HadExtra = true; return true; } @@ -2772,21 +3845,22 @@ bool Driver::GetReleaseVersion(const char *Str, unsigned &Major, /// /// \return True if the entire string was parsed and there are /// no extra characters remaining at the end. -bool Driver::GetReleaseVersion(const char *Str, +bool Driver::GetReleaseVersion(StringRef Str, MutableArrayRef<unsigned> Digits) { - if (*Str == '\0') + if (Str.empty()) return false; - char *End; unsigned CurDigit = 0; while (CurDigit < Digits.size()) { - unsigned Digit = (unsigned)strtol(Str, &End, 10); + unsigned Digit; + if (Str.consumeInteger(10, Digit)) + return false; Digits[CurDigit] = Digit; - if (*Str != '\0' && *End == '\0') + if (Str.empty()) return true; - if (*End != '.' || Str == End) + if (Str[0] != '.') return false; - Str = End + 1; + Str = Str.drop_front(1); CurDigit++; } |