summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/NVPTX
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/NVPTX')
-rw-r--r--contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTX.h2
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp143
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp207
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp4
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp3
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h9
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp170
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp136
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp13
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h24
-rw-r--r--contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp13
15 files changed, 453 insertions, 284 deletions
diff --git a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
index ac92df9..4594c22 100644
--- a/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
@@ -85,7 +85,7 @@ void NVPTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
O << markup("<imm:") << formatImm(Op.getImm()) << markup(">");
} else {
assert(Op.isExpr() && "Unknown operand kind in printOperand");
- O << *Op.getExpr();
+ Op.getExpr()->print(O, &MAI);
}
}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
index b9df3d1..ef36c13 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp
@@ -25,8 +25,7 @@ static cl::opt<bool> CompileForDebugging("debug-compile",
void NVPTXMCAsmInfo::anchor() {}
-NVPTXMCAsmInfo::NVPTXMCAsmInfo(StringRef TT) {
- Triple TheTriple(TT);
+NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Triple &TheTriple) {
if (TheTriple.getArch() == Triple::nvptx64) {
PointerSize = CalleeSaveStackSlotSize = 8;
}
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
index c324286..b432e06 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h
@@ -18,12 +18,12 @@
namespace llvm {
class Target;
-class StringRef;
+class Triple;
class NVPTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
- explicit NVPTXMCAsmInfo(StringRef TT);
+ explicit NVPTXMCAsmInfo(const Triple &TheTriple);
};
} // namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index 382525d..477b0ba 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -69,7 +69,7 @@ ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
MachineFunctionPass *createNVPTXPrologEpilogPass();
MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
FunctionPass *createNVPTXImageOptimizerPass();
-FunctionPass *createNVPTXLowerStructArgsPass();
+FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
bool isImageOrSamplerVal(const Value *, const Module *);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 3bbea40..298b992 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -266,7 +266,7 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
MCOp = MCOperand::createImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(MCSymbolRefExpr::Create(
+ MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_ExternalSymbol:
@@ -283,11 +283,11 @@ bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO,
default: report_fatal_error("Unsupported FP type"); break;
case Type::FloatTyID:
MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::CreateConstantFPSingle(Val, OutContext));
+ NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext));
break;
case Type::DoubleTyID:
MCOp = MCOperand::createExpr(
- NVPTXFloatMCExpr::CreateConstantFPDouble(Val, OutContext));
+ NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext));
break;
}
break;
@@ -334,7 +334,7 @@ unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) {
MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) {
const MCExpr *Expr;
- Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+ Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None,
OutContext);
return MCOperand::createExpr(Expr);
}
@@ -418,9 +418,8 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
const MachineBasicBlock &MBB) const {
MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
- // TODO: isLoopHeader() should take "const MachineBasicBlock *".
// We insert .pragma "nounroll" only to the loop header.
- if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
+ if (!LI.isLoopHeader(&MBB))
return false;
// llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
@@ -468,7 +467,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
printReturnValStr(*MF, O);
}
- O << *CurrentFnSym;
+ CurrentFnSym->print(O, MAI);
emitFunctionParamList(*MF, O);
@@ -625,7 +624,8 @@ void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) {
else
O << ".func ";
printReturnValStr(F, O);
- O << *getSymbol(F) << "\n";
+ getSymbol(F)->print(O, MAI);
+ O << "\n";
emitFunctionParamList(F, O);
O << ";\n";
}
@@ -1172,7 +1172,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
else
O << getPTXFundamentalTypeStr(ETy, false);
O << " ";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
// Ptx allows variable initilization only for constant and global state
// spaces.
@@ -1189,11 +1189,9 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
// The frontend adds zero-initializer to variables that don't have an
// initial value, so skip warning for this case.
if (!GVar->getInitializer()->isNullValue()) {
- std::string warnMsg =
- ("initial value of '" + GVar->getName() +
- "' is not allowed in addrspace(" +
- Twine(llvm::utostr_32(PTy->getAddressSpace())) + ")").str();
- report_fatal_error(warnMsg.c_str());
+ report_fatal_error("initial value of '" + GVar->getName() +
+ "' is not allowed in addrspace(" +
+ Twine(PTy->getAddressSpace()) + ")");
}
}
}
@@ -1220,15 +1218,21 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
bufferAggregateConstant(Initializer, &aggBuffer);
if (aggBuffer.numSymbols) {
if (static_cast<const NVPTXTargetMachine &>(TM).is64Bit()) {
- O << " .u64 " << *getSymbol(GVar) << "[";
+ O << " .u64 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
O << ElementSize / 8;
} else {
- O << " .u32 " << *getSymbol(GVar) << "[";
+ O << " .u32 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
O << ElementSize / 4;
}
O << "]";
} else {
- O << " .b8 " << *getSymbol(GVar) << "[";
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
O << ElementSize;
O << "]";
}
@@ -1236,7 +1240,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
aggBuffer.print();
O << "}";
} else {
- O << " .b8 " << *getSymbol(GVar);
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1244,7 +1249,8 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar,
}
}
} else {
- O << " .b8 " << *getSymbol(GVar);
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
if (ElementSize) {
O << "[";
O << ElementSize;
@@ -1351,7 +1357,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
O << " .";
O << getPTXFundamentalTypeStr(ETy);
O << " ";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
return;
}
@@ -1366,9 +1372,11 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar,
case Type::ArrayTyID:
case Type::VectorTyID:
ElementSize = TD->getTypeStoreSize(ETy);
- O << " .b8 " << *getSymbol(GVar) << "[";
+ O << " .b8 ";
+ getSymbol(GVar)->print(O, MAI);
+ O << "[";
if (ElementSize) {
- O << itostr(ElementSize);
+ O << ElementSize;
}
O << "]";
break;
@@ -1408,11 +1416,13 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) {
void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I,
int paramIndex, raw_ostream &O) {
- O << *getSymbol(I->getParent()) << "_param_" << paramIndex;
+ getSymbol(I->getParent())->print(O, MAI);
+ O << "_param_" << paramIndex;
}
void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) {
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
}
void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
@@ -1446,21 +1456,24 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
O << "\t.param .u64 .ptr .surfref ";
else
O << "\t.param .surfref ";
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
}
else { // Default image is read_only
if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .texref ";
else
O << "\t.param .texref ";
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
}
} else {
if (nvptxSubtarget->hasImageHandles())
O << "\t.param .u64 .ptr .samplerref ";
else
O << "\t.param .samplerref ";
- O << *CurrentFnSym << "_param_" << paramIndex;
+ CurrentFnSym->print(O, MAI);
+ O << "_param_" << paramIndex;
}
continue;
}
@@ -1716,10 +1729,10 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
}
if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) {
O << "generic(";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
O << ")";
} else {
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
}
return;
}
@@ -1733,20 +1746,44 @@ void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) {
if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
O << "generic(";
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
O << ")";
} else {
- O << *getSymbol(GVar);
+ getSymbol(GVar)->print(O, MAI);
}
return;
} else {
- O << *lowerConstant(CPV);
+ lowerConstant(CPV)->print(O, MAI);
return;
}
}
llvm_unreachable("Not scalar type found in printScalarConstant()");
}
+// These utility functions assure we get the right sequence of bytes for a given
+// type even for big-endian machines
+template <typename T> static void ConvertIntToBytes(unsigned char *p, T val) {
+ int64_t vp = (int64_t)val;
+ for (unsigned i = 0; i < sizeof(T); ++i) {
+ p[i] = (unsigned char)vp;
+ vp >>= 8;
+ }
+}
+static void ConvertFloatToBytes(unsigned char *p, float val) {
+ int32_t *vp = (int32_t *)&val;
+ for (unsigned i = 0; i < sizeof(int32_t); ++i) {
+ p[i] = (unsigned char)*vp;
+ *vp >>= 8;
+ }
+}
+static void ConvertDoubleToBytes(unsigned char *p, double val) {
+ int64_t *vp = (int64_t *)&val;
+ for (unsigned i = 0; i < sizeof(int64_t); ++i) {
+ p[i] = (unsigned char)*vp;
+ *vp >>= 8;
+ }
+}
+
void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
AggBuffer *aggBuffer) {
@@ -1760,30 +1797,30 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
return;
}
- unsigned char *ptr;
+ unsigned char ptr[8];
switch (CPV->getType()->getTypeID()) {
case Type::IntegerTyID: {
const Type *ETy = CPV->getType();
if (ETy == Type::getInt8Ty(CPV->getContext())) {
unsigned char c = (unsigned char)cast<ConstantInt>(CPV)->getZExtValue();
- ptr = &c;
+ ConvertIntToBytes<>(ptr, c);
aggBuffer->addBytes(ptr, 1, Bytes);
} else if (ETy == Type::getInt16Ty(CPV->getContext())) {
short int16 = (short)cast<ConstantInt>(CPV)->getZExtValue();
- ptr = (unsigned char *)&int16;
+ ConvertIntToBytes<>(ptr, int16);
aggBuffer->addBytes(ptr, 2, Bytes);
} else if (ETy == Type::getInt32Ty(CPV->getContext())) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
int int32 = (int)(constInt->getZExtValue());
- ptr = (unsigned char *)&int32;
+ ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, *TD))) {
int int32 = (int)(constInt->getZExtValue());
- ptr = (unsigned char *)&int32;
+ ConvertIntToBytes<>(ptr, int32);
aggBuffer->addBytes(ptr, 4, Bytes);
break;
}
@@ -1798,14 +1835,14 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
} else if (ETy == Type::getInt64Ty(CPV->getContext())) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(CPV)) {
long long int64 = (long long)(constInt->getZExtValue());
- ptr = (unsigned char *)&int64;
+ ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
break;
} else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
ConstantFoldConstantExpression(Cexpr, *TD))) {
long long int64 = (long long)(constInt->getZExtValue());
- ptr = (unsigned char *)&int64;
+ ConvertIntToBytes<>(ptr, int64);
aggBuffer->addBytes(ptr, 8, Bytes);
break;
}
@@ -1827,11 +1864,11 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
const Type *Ty = CFP->getType();
if (Ty == Type::getFloatTy(CPV->getContext())) {
float float32 = (float) CFP->getValueAPF().convertToFloat();
- ptr = (unsigned char *)&float32;
+ ConvertFloatToBytes(ptr, float32);
aggBuffer->addBytes(ptr, 4, Bytes);
} else if (Ty == Type::getDoubleTy(CPV->getContext())) {
double float64 = CFP->getValueAPF().convertToDouble();
- ptr = (unsigned char *)&float64;
+ ConvertDoubleToBytes(ptr, float64);
aggBuffer->addBytes(ptr, 8, Bytes);
} else {
llvm_unreachable("unsupported fp const type");
@@ -1993,16 +2030,16 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
MCContext &Ctx = OutContext;
if (CV->isNullValue() || isa<UndefValue>(CV))
- return MCConstantExpr::Create(0, Ctx);
+ return MCConstantExpr::create(0, Ctx);
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV))
- return MCConstantExpr::Create(CI->getZExtValue(), Ctx);
+ return MCConstantExpr::create(CI->getZExtValue(), Ctx);
if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) {
const MCSymbolRefExpr *Expr =
- MCSymbolRefExpr::Create(getSymbol(GV), Ctx);
+ MCSymbolRefExpr::create(getSymbol(GV), Ctx);
if (ProcessingGeneric) {
- return NVPTXGenericMCSymbolRefExpr::Create(Expr, Ctx);
+ return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx);
} else {
return Expr;
}
@@ -2059,7 +2096,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
return Base;
int64_t Offset = OffsetAI.getSExtValue();
- return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx),
+ return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx),
Ctx);
}
@@ -2102,8 +2139,8 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
// the high bits so we are sure to get a proper truncation if the input is
// a constant expr.
unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType());
- const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx);
- return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx);
+ const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx);
+ return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx);
}
// The MC library also has a right-shift operator, but it isn't consistently
@@ -2113,7 +2150,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric);
switch (CE->getOpcode()) {
default: llvm_unreachable("Unknown binary operator constant cast expr");
- case Instruction::Add: return MCBinaryExpr::CreateAdd(LHS, RHS, Ctx);
+ case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx);
}
}
}
@@ -2123,7 +2160,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
switch (Expr.getKind()) {
case MCExpr::Target:
- return cast<MCTargetExpr>(&Expr)->PrintImpl(OS);
+ return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI);
case MCExpr::Constant:
OS << cast<MCConstantExpr>(Expr).getValue();
return;
@@ -2131,7 +2168,7 @@ void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) {
case MCExpr::SymbolRef: {
const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr);
const MCSymbol &Sym = SRE.getSymbol();
- OS << Sym;
+ Sym.print(OS, MAI);
return;
}
@@ -2256,11 +2293,11 @@ void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
break;
case MachineOperand::MO_GlobalAddress:
- O << *getSymbol(MO.getGlobal());
+ getSymbol(MO.getGlobal())->print(O, MAI);
break;
case MachineOperand::MO_MachineBasicBlock:
- O << *MO.getMBB()->getSymbol();
+ MO.getMBB()->getSymbol()->print(O, MAI);
return;
default:
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 301c686..f6f7685 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -165,10 +165,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
}
if (EmitGeneric && !isa<Function>(v) && !IsNonGenericPointer) {
O << "generic(";
- O << *Name;
+ Name->print(O, AP.MAI);
O << ")";
} else {
- O << *Name;
+ Name->print(O, AP.MAI);
}
} else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) {
const MCExpr *Expr =
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
index ae63cae..cfff001 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
@@ -10,34 +10,54 @@
// When a load/store accesses the generic address space, checks whether the
// address is casted from a non-generic address space. If so, remove this
// addrspacecast because accessing non-generic address spaces is typically
-// faster. Besides seeking addrspacecasts, this optimization also traces into
-// the base pointer of a GEP.
+// faster. Besides removing addrspacecasts directly used by loads/stores, this
+// optimization also recursively traces into a GEP's pointer operand and a
+// bitcast's source to find more eliminable addrspacecasts.
//
// For instance, the code below loads a float from an array allocated in
// addrspace(3).
//
-// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
-// %1 = gep [10 x float]* %0, i64 0, i64 %i
-// %2 = load float* %1 ; emits ld.f32
+// %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
+// %1 = gep [10 x float]* %0, i64 0, i64 %i
+// %2 = bitcast float* %1 to i32*
+// %3 = load i32* %2 ; emits ld.u32
//
-// First, function hoistAddrSpaceCastFromGEP reorders the addrspacecast
-// and the GEP to expose more optimization opportunities to function
+// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
+// and the bitcast to expose more optimization opportunities to function
// optimizeMemoryInst. The intermediate code looks like:
//
-// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %1 = addrspacecast float addrspace(3)* %0 to float*
-// %2 = load float* %1 ; still emits ld.f32, but will be optimized shortly
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
+// %2 = addrspacecast i32 addrspace(3)* %1 to i32*
+// %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
//
// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
// generic pointers, and folds the load and the addrspacecast into a load from
// the original address space. The final code looks like:
//
-// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-// %2 = load float addrspace(3)* %0 ; emits ld.shared.f32
+// %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
+// %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
+// %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
//
// This pass may remove an addrspacecast in a different BB. Therefore, we
// implement it as a FunctionPass.
//
+// TODO:
+// The current implementation doesn't handle PHINodes. Eliminating
+// addrspacecasts used by PHINodes is trickier because PHINodes can introduce
+// loops in data flow. For example,
+//
+// %generic.input = addrspacecast float addrspace(3)* %input to float*
+// loop:
+// %y = phi [ %generic.input, %y2 ]
+// %y2 = getelementptr %y, 1
+// %v = load %y2
+// br ..., label %loop, ...
+//
+// Marking %y2 shared depends on marking %y shared, but %y also data-flow
+// depends on %y2. We probably need an iterative fix-point algorithm on handle
+// this case.
+//
//===----------------------------------------------------------------------===//
#include "NVPTX.h"
@@ -62,17 +82,31 @@ class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
public:
static char ID;
NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
-
bool runOnFunction(Function &F) override;
+private:
/// Optimizes load/store instructions. Idx is the index of the pointer operand
/// (0 for load, and 1 for store). Returns true if it changes anything.
bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
+ /// Recursively traces into a GEP's pointer operand or a bitcast's source to
+ /// find an eliminable addrspacecast, and hoists that addrspacecast to the
+ /// outermost level. For example, this function transforms
+ /// bitcast(gep(gep(addrspacecast(X))))
+ /// to
+ /// addrspacecast(bitcast(gep(gep(X)))).
+ ///
+ /// This reordering exposes to optimizeMemoryInstruction more
+ /// optimization opportunities on loads and stores.
+ ///
+ /// Returns true if this function succesfully hoists an eliminable
+ /// addrspacecast or V is already such an addrspacecast.
/// Transforms "gep (addrspacecast X), indices" into "addrspacecast (gep X,
- /// indices)". This reordering exposes to optimizeMemoryInstruction more
- /// optimization opportunities on loads and stores. Returns true if it changes
- /// the program.
- bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP);
+ /// indices)".
+ bool hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
+ /// Helper function for GEPs.
+ bool hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
+ /// Helper function for bitcasts.
+ bool hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
};
}
@@ -85,11 +119,12 @@ INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
"Remove unnecessary non-generic-to-generic addrspacecasts",
false, false)
-// Decides whether removing Cast is valid and beneficial. Cast can be an
-// instruction or a constant expression.
-static bool IsEliminableAddrSpaceCast(Operator *Cast) {
- // Returns false if not even an addrspacecast.
- if (Cast->getOpcode() != Instruction::AddrSpaceCast)
+// Decides whether V is an addrspacecast and shortcutting V in load/store is
+// valid and beneficial.
+static bool isEliminableAddrSpaceCast(Value *V) {
+ // Returns false if V is not even an addrspacecast.
+ Operator *Cast = dyn_cast<Operator>(V);
+ if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
return false;
Value *Src = Cast->getOperand(0);
@@ -108,67 +143,119 @@ static bool IsEliminableAddrSpaceCast(Operator *Cast) {
DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
}
-bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
- GEPOperator *GEP) {
- Operator *Cast = dyn_cast<Operator>(GEP->getPointerOperand());
- if (!Cast)
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(GEPOperator *GEP,
+ int Depth) {
+ if (!hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1))
return false;
- if (!IsEliminableAddrSpaceCast(Cast))
- return false;
+ // That hoistAddrSpaceCastFrom succeeds implies GEP's pointer operand is now
+ // an eliminable addrspacecast.
+ assert(isEliminableAddrSpaceCast(GEP->getPointerOperand()));
+ Operator *Cast = cast<Operator>(GEP->getPointerOperand());
SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
- // %1 = gep (addrspacecast X), indices
+ // GEP = gep (addrspacecast X), indices
// =>
- // %0 = gep X, indices
- // %1 = addrspacecast %0
- GetElementPtrInst *NewGEPI = GetElementPtrInst::Create(
+ // NewGEP = gep X, indices
+ // NewASC = addrspacecast NewGEP
+ GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
GEP->getSourceElementType(), Cast->getOperand(0), Indices,
- GEP->getName(), GEPI);
- NewGEPI->setIsInBounds(GEP->isInBounds());
- GEP->replaceAllUsesWith(
- new AddrSpaceCastInst(NewGEPI, GEP->getType(), "", GEPI));
+ "", GEPI);
+ NewGEP->setIsInBounds(GEP->isInBounds());
+ Value *NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
+ NewASC->takeName(GEP);
+ GEP->replaceAllUsesWith(NewASC);
} else {
// GEP is a constant expression.
- Constant *NewGEPCE = ConstantExpr::getGetElementPtr(
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
Indices, GEP->isInBounds());
GEP->replaceAllUsesWith(
- ConstantExpr::getAddrSpaceCast(NewGEPCE, GEP->getType()));
+ ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType()));
}
return true;
}
-bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
- unsigned Idx) {
- // If the pointer operand is a GEP, hoist the addrspacecast if any from the
- // GEP to expose more optimization opportunites.
- if (GEPOperator *GEP = dyn_cast<GEPOperator>(MI->getOperand(Idx))) {
- hoistAddrSpaceCastFromGEP(GEP);
- }
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
+ BitCastOperator *BC, int Depth) {
+ if (!hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1))
+ return false;
- // load/store (addrspacecast X) => load/store X if shortcutting the
- // addrspacecast is valid and can improve performance.
- //
- // e.g.,
- // %1 = addrspacecast float addrspace(3)* %0 to float*
- // %2 = load float* %1
- // ->
- // %2 = load float addrspace(3)* %0
- //
- // Note: the addrspacecast can also be a constant expression.
- if (Operator *Cast = dyn_cast<Operator>(MI->getOperand(Idx))) {
- if (IsEliminableAddrSpaceCast(Cast)) {
- MI->setOperand(Idx, Cast->getOperand(0));
- return true;
- }
+ // That hoistAddrSpaceCastFrom succeeds implies BC's source operand is now
+ // an eliminable addrspacecast.
+ assert(isEliminableAddrSpaceCast(BC->getOperand(0)));
+ Operator *Cast = cast<Operator>(BC->getOperand(0));
+
+ // Cast = addrspacecast Src
+ // BC = bitcast Cast
+ // =>
+ // Cast' = bitcast Src
+ // BC' = addrspacecast Cast'
+ Value *Src = Cast->getOperand(0);
+ Type *TypeOfNewCast =
+ PointerType::get(BC->getType()->getPointerElementType(),
+ Src->getType()->getPointerAddressSpace());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
+ Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
+ Value *NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
+ NewBC->takeName(BC);
+ BC->replaceAllUsesWith(NewBC);
+ } else {
+ // BC is a constant expression.
+ Constant *NewCast =
+ ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
+ Constant *NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
+ BC->replaceAllUsesWith(NewBC);
}
+ return true;
+}
+
+bool NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
+ int Depth) {
+ // Returns true if V is already an eliminable addrspacecast.
+ if (isEliminableAddrSpaceCast(V))
+ return true;
+
+ // Limit the depth to prevent this recursive function from running too long.
+ const int MaxDepth = 20;
+ if (Depth >= MaxDepth)
+ return false;
+
+ // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
+ // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
+ // that are not directly used by the load/store.
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+ return hoistAddrSpaceCastFromGEP(GEP, Depth);
+
+ if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
+ return hoistAddrSpaceCastFromBitCast(BC, Depth);
return false;
}
+bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
+ unsigned Idx) {
+ if (hoistAddrSpaceCastFrom(MI->getOperand(Idx))) {
+ // load/store (addrspacecast X) => load/store X if shortcutting the
+ // addrspacecast is valid and can improve performance.
+ //
+ // e.g.,
+ // %1 = addrspacecast float addrspace(3)* %0 to float*
+ // %2 = load float* %1
+ // ->
+ // %2 = load float addrspace(3)* %0
+ //
+ // Note: the addrspacecast can also be a constant expression.
+ assert(isEliminableAddrSpaceCast(MI->getOperand(Idx)));
+ Operator *ASC = dyn_cast<Operator>(MI->getOperand(Idx));
+ MI->setOperand(Idx, ASC->getOperand(0));
+ return true;
+ }
+ return false;
+}
+
bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
if (DisableFavorNonGeneric)
return false;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index fa38a68..232a611 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -613,6 +613,10 @@ SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Opc =
TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
break;
+ case ADDRESS_SPACE_PARAM:
+ Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
+ : NVPTX::nvvm_ptr_gen_to_param;
+ break;
}
return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 805847a..b5af72a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -3725,7 +3725,8 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
/// (LoopStrengthReduce.cpp) and memory optimization for address mode
/// (CodeGenPrepare.cpp)
bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
- Type *Ty) const {
+ Type *Ty,
+ unsigned AS) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 5142ae3..ed94775 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -456,7 +456,8 @@ public:
/// Used to guide target specific optimizations, like loop strength
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
- bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
+ bool isLegalAddressingMode(const AddrMode &AM, Type *Ty,
+ unsigned AS) const override;
/// getFunctionAlignment - Return the Log2 alignment of this function.
unsigned getFunctionAlignment(const Function *F) const;
@@ -497,12 +498,6 @@ public:
std::vector<SDValue> &Ops,
SelectionDAG &DAG) const override;
- unsigned getInlineAsmMemConstraint(
- const std::string &ConstraintCode) const override {
- // FIXME: Map different constraints differently.
- return InlineAsm::Constraint_m;
- }
-
const NVPTXTargetMachine *nvTM;
// PTX always uses 32-bit shift amounts
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
new file mode 100644
index 0000000..24dcb12
--- /dev/null
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
@@ -0,0 +1,170 @@
+//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Pointer arguments to kernel functions need to be lowered specially.
+//
+// 1. Copy byval struct args to local memory. This is a preparation for handling
+// cases like
+//
+// kernel void foo(struct A arg, ...)
+// {
+// struct A *p = &arg;
+// ...
+// ... = p->filed1 ... (this is no generic address for .param)
+// p->filed2 = ... (this is no write access to .param)
+// }
+//
+// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the
+// global address space. This allows later optimizations to emit
+// ld.global.*/st.global.* for accessing these pointer arguments. For
+// example,
+//
+// define void @foo(float* %input) {
+// %v = load float, float* %input, align 4
+// ...
+// }
+//
+// becomes
+//
+// define void @foo(float* %input) {
+// %input2 = addrspacecast float* %input to float addrspace(1)*
+// %input3 = addrspacecast float addrspace(1)* %input2 to float*
+// %v = load float, float* %input3, align 4
+// ...
+// }
+//
+// Later, NVPTXFavorNonGenericAddrSpaces will optimize it to
+//
+// define void @foo(float* %input) {
+// %input2 = addrspacecast float* %input to float addrspace(1)*
+// %v = load float, float addrspace(1)* %input2, align 4
+// ...
+// }
+//
+// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
+// don't cancel the addrspacecast pair this pass emits.
+//===----------------------------------------------------------------------===//
+
+#include "NVPTX.h"
+#include "NVPTXUtilities.h"
+#include "NVPTXTargetMachine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace llvm {
+void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+}
+
+namespace {
+class NVPTXLowerKernelArgs : public FunctionPass {
+ bool runOnFunction(Function &F) override;
+
+ // handle byval parameters
+ void handleByValParam(Argument *);
+ // handle non-byval pointer parameters
+ void handlePointerParam(Argument *);
+
+public:
+ static char ID; // Pass identification, replacement for typeid
+ NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr)
+ : FunctionPass(ID), TM(TM) {}
+ const char *getPassName() const override {
+ return "Lower pointer arguments of CUDA kernels";
+ }
+
+private:
+ const NVPTXTargetMachine *TM;
+};
+} // namespace
+
+char NVPTXLowerKernelArgs::ID = 1;
+
+INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
+ "Lower kernel arguments (NVPTX)", false, false)
+
+// =============================================================================
+// If the function had a byval struct ptr arg, say foo(%struct.x *byval %d),
+// then add the following instructions to the first basic block:
+//
+// %temp = alloca %struct.x, align 8
+// %tempd = addrspacecast %struct.x* %d to %struct.x addrspace(101)*
+// %tv = load %struct.x addrspace(101)* %tempd
+// store %struct.x %tv, %struct.x* %temp, align 8
+//
+// The above code allocates some space in the stack and copies the incoming
+// struct from param space to local space.
+// Then replace all occurences of %d by %temp.
+// =============================================================================
+void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
+ Function *Func = Arg->getParent();
+ Instruction *FirstInst = &(Func->getEntryBlock().front());
+ PointerType *PType = dyn_cast<PointerType>(Arg->getType());
+
+ assert(PType && "Expecting pointer type in handleByValParam");
+
+ Type *StructType = PType->getElementType();
+ AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
+ // Set the alignment to alignment of the byval parameter. This is because,
+ // later load/stores assume that alignment, and we are going to replace
+ // the use of the byval parameter with this alloca instruction.
+ AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
+ Arg->replaceAllUsesWith(AllocA);
+
+ Value *ArgInParam = new AddrSpaceCastInst(
+ Arg, PointerType::get(StructType, ADDRESS_SPACE_PARAM), Arg->getName(),
+ FirstInst);
+ LoadInst *LI = new LoadInst(ArgInParam, Arg->getName(), FirstInst);
+ new StoreInst(LI, AllocA, FirstInst);
+}
+
+void NVPTXLowerKernelArgs::handlePointerParam(Argument *Arg) {
+ assert(!Arg->hasByValAttr() &&
+ "byval params should be handled by handleByValParam");
+
+ Instruction *FirstInst = Arg->getParent()->getEntryBlock().begin();
+ Instruction *ArgInGlobal = new AddrSpaceCastInst(
+ Arg, PointerType::get(Arg->getType()->getPointerElementType(),
+ ADDRESS_SPACE_GLOBAL),
+ Arg->getName(), FirstInst);
+ Value *ArgInGeneric = new AddrSpaceCastInst(ArgInGlobal, Arg->getType(),
+ Arg->getName(), FirstInst);
+ // Replace with ArgInGeneric all uses of Args except ArgInGlobal.
+ Arg->replaceAllUsesWith(ArgInGeneric);
+ ArgInGlobal->setOperand(0, Arg);
+}
+
+
+// =============================================================================
+// Main function for this pass.
+// =============================================================================
+bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
+ // Skip non-kernels. See the comments at the top of this file.
+ if (!isKernelFunction(F))
+ return false;
+
+ for (Argument &Arg : F.args()) {
+ if (Arg.getType()->isPointerTy()) {
+ if (Arg.hasByValAttr())
+ handleByValParam(&Arg);
+ else if (TM && TM->getDrvInterface() == NVPTX::CUDA)
+ handlePointerParam(&Arg);
+ }
+ }
+ return true;
+}
+
+FunctionPass *
+llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
+ return new NVPTXLowerKernelArgs(TM);
+}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
deleted file mode 100644
index 68dfbb7..0000000
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerStructArgs.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-//===-- NVPTXLowerStructArgs.cpp - Copy struct args to local memory =====--===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Copy struct args to local memory. This is needed for kernel functions only.
-// This is a preparation for handling cases like
-//
-// kernel void foo(struct A arg, ...)
-// {
-// struct A *p = &arg;
-// ...
-// ... = p->filed1 ... (this is no generic address for .param)
-// p->filed2 = ... (this is no write access to .param)
-// }
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
-#include "NVPTXUtilities.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Pass.h"
-
-using namespace llvm;
-
-namespace llvm {
-void initializeNVPTXLowerStructArgsPass(PassRegistry &);
-}
-
-namespace {
-class NVPTXLowerStructArgs : public FunctionPass {
- bool runOnFunction(Function &F) override;
-
- void handleStructPtrArgs(Function &);
- void handleParam(Argument *);
-
-public:
- static char ID; // Pass identification, replacement for typeid
- NVPTXLowerStructArgs() : FunctionPass(ID) {}
- const char *getPassName() const override {
- return "Copy structure (byval *) arguments to stack";
- }
-};
-} // namespace
-
-char NVPTXLowerStructArgs::ID = 1;
-
-INITIALIZE_PASS(NVPTXLowerStructArgs, "nvptx-lower-struct-args",
- "Lower structure arguments (NVPTX)", false, false)
-
-void NVPTXLowerStructArgs::handleParam(Argument *Arg) {
- Function *Func = Arg->getParent();
- Instruction *FirstInst = &(Func->getEntryBlock().front());
- PointerType *PType = dyn_cast<PointerType>(Arg->getType());
-
- assert(PType && "Expecting pointer type in handleParam");
-
- Type *StructType = PType->getElementType();
- AllocaInst *AllocA = new AllocaInst(StructType, Arg->getName(), FirstInst);
-
- /* Set the alignment to alignment of the byval parameter. This is because,
- * later load/stores assume that alignment, and we are going to replace
- * the use of the byval parameter with this alloca instruction.
- */
- AllocA->setAlignment(Func->getParamAlignment(Arg->getArgNo() + 1));
-
- Arg->replaceAllUsesWith(AllocA);
-
- // Get the cvt.gen.to.param intrinsic
- Type *CvtTypes[] = {
- Type::getInt8PtrTy(Func->getParent()->getContext(), ADDRESS_SPACE_PARAM),
- Type::getInt8PtrTy(Func->getParent()->getContext(),
- ADDRESS_SPACE_GENERIC)};
- Function *CvtFunc = Intrinsic::getDeclaration(
- Func->getParent(), Intrinsic::nvvm_ptr_gen_to_param, CvtTypes);
-
- Value *BitcastArgs[] = {
- new BitCastInst(Arg, Type::getInt8PtrTy(Func->getParent()->getContext(),
- ADDRESS_SPACE_GENERIC),
- Arg->getName(), FirstInst)};
- CallInst *CallCVT =
- CallInst::Create(CvtFunc, BitcastArgs, "cvt_to_param", FirstInst);
-
- BitCastInst *BitCast = new BitCastInst(
- CallCVT, PointerType::get(StructType, ADDRESS_SPACE_PARAM),
- Arg->getName(), FirstInst);
- LoadInst *LI = new LoadInst(BitCast, Arg->getName(), FirstInst);
- new StoreInst(LI, AllocA, FirstInst);
-}
-
-// =============================================================================
-// If the function had a struct ptr arg, say foo(%struct.x *byval %d), then
-// add the following instructions to the first basic block :
-//
-// %temp = alloca %struct.x, align 8
-// %tt1 = bitcast %struct.x * %d to i8 *
-// %tt2 = llvm.nvvm.cvt.gen.to.param %tt2
-// %tempd = bitcast i8 addrspace(101) * to %struct.x addrspace(101) *
-// %tv = load %struct.x addrspace(101) * %tempd
-// store %struct.x %tv, %struct.x * %temp, align 8
-//
-// The above code allocates some space in the stack and copies the incoming
-// struct from param space to local space.
-// Then replace all occurences of %d by %temp.
-// =============================================================================
-void NVPTXLowerStructArgs::handleStructPtrArgs(Function &F) {
- for (Argument &Arg : F.args()) {
- if (Arg.getType()->isPointerTy() && Arg.hasByValAttr()) {
- handleParam(&Arg);
- }
- }
-}
-
-// =============================================================================
-// Main function for this pass.
-// =============================================================================
-bool NVPTXLowerStructArgs::runOnFunction(Function &F) {
- // Skip non-kernels. See the comments at the top of this file.
- if (!isKernelFunction(F))
- return false;
-
- handleStructPtrArgs(F);
- return true;
-}
-
-FunctionPass *llvm::createNVPTXLowerStructArgsPass() {
- return new NVPTXLowerStructArgs();
-}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 779b65e..3c98b9f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -16,11 +16,11 @@ using namespace llvm;
#define DEBUG_TYPE "nvptx-mcexpr"
const NVPTXFloatMCExpr*
-NVPTXFloatMCExpr::Create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {
+NVPTXFloatMCExpr::create(VariantKind Kind, APFloat Flt, MCContext &Ctx) {
return new (Ctx) NVPTXFloatMCExpr(Kind, Flt);
}
-void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const {
+void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
bool Ignored;
unsigned NumHex;
APFloat APF = getAPFloat();
@@ -47,11 +47,14 @@ void NVPTXFloatMCExpr::PrintImpl(raw_ostream &OS) const {
}
const NVPTXGenericMCSymbolRefExpr*
-NVPTXGenericMCSymbolRefExpr::Create(const MCSymbolRefExpr *SymExpr,
+NVPTXGenericMCSymbolRefExpr::create(const MCSymbolRefExpr *SymExpr,
MCContext &Ctx) {
return new (Ctx) NVPTXGenericMCSymbolRefExpr(SymExpr);
}
-void NVPTXGenericMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
- OS << "generic(" << *SymExpr << ")";
+void NVPTXGenericMCSymbolRefExpr::printImpl(raw_ostream &OS,
+ const MCAsmInfo *MAI) const {
+ OS << "generic(";
+ SymExpr->print(OS, MAI);
+ OS << ")";
}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
index 8c6b219..46b4b33 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h
@@ -36,17 +36,17 @@ public:
/// @name Construction
/// @{
- static const NVPTXFloatMCExpr *Create(VariantKind Kind, APFloat Flt,
+ static const NVPTXFloatMCExpr *create(VariantKind Kind, APFloat Flt,
MCContext &Ctx);
- static const NVPTXFloatMCExpr *CreateConstantFPSingle(APFloat Flt,
+ static const NVPTXFloatMCExpr *createConstantFPSingle(APFloat Flt,
MCContext &Ctx) {
- return Create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx);
+ return create(VK_NVPTX_SINGLE_PREC_FLOAT, Flt, Ctx);
}
- static const NVPTXFloatMCExpr *CreateConstantFPDouble(APFloat Flt,
+ static const NVPTXFloatMCExpr *createConstantFPDouble(APFloat Flt,
MCContext &Ctx) {
- return Create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx);
+ return create(VK_NVPTX_DOUBLE_PREC_FLOAT, Flt, Ctx);
}
/// @}
@@ -61,14 +61,14 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override {
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *FindAssociatedSection() const override { return nullptr; }
+ MCSection *findAssociatedSection() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
@@ -92,7 +92,7 @@ public:
/// @{
static const NVPTXGenericMCSymbolRefExpr
- *Create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx);
+ *create(const MCSymbolRefExpr *SymExpr, MCContext &Ctx);
/// @}
/// @name Accessors
@@ -103,14 +103,14 @@ public:
/// @}
- void PrintImpl(raw_ostream &OS) const override;
- bool EvaluateAsRelocatableImpl(MCValue &Res,
+ void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+ bool evaluateAsRelocatableImpl(MCValue &Res,
const MCAsmLayout *Layout,
const MCFixup *Fixup) const override {
return false;
}
void visitUsedExpr(MCStreamer &Streamer) const override {};
- MCSection *FindAssociatedSection() const override { return nullptr; }
+ MCSection *findAssociatedSection() const override { return nullptr; }
// There are no TLS NVPTXMCExprs at the moment.
void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index ac27c30..a646668 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -53,7 +53,7 @@ void initializeGenericToNVVMPass(PassRegistry&);
void initializeNVPTXAllocaHoistingPass(PassRegistry &);
void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
-void initializeNVPTXLowerStructArgsPass(PassRegistry &);
+void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
}
extern "C" void LLVMInitializeNVPTXTarget() {
@@ -69,7 +69,7 @@ extern "C" void LLVMInitializeNVPTXTarget() {
initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
initializeNVPTXFavorNonGenericAddrSpacesPass(
*PassRegistry::getPassRegistry());
- initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry());
+ initializeNVPTXLowerKernelArgsPass(*PassRegistry::getPassRegistry());
}
static std::string computeDataLayout(bool is64Bit) {
@@ -163,7 +163,13 @@ void NVPTXPassConfig::addIRPasses() {
TargetPassConfig::addIRPasses();
addPass(createNVPTXAssignValidGlobalNamesPass());
addPass(createGenericToNVVMPass());
+ addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
addPass(createNVPTXFavorNonGenericAddrSpacesPass());
+ // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
+ // be eliminated by SROA. We do not run SROA right after NVPTXLowerKernelArgs
+ // because we plan to merge NVPTXLowerKernelArgs and
+ // NVPTXFavorNonGenericAddrSpaces into one pass.
+ addPass(createSROAPass());
// FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
// them unused. We could remove dead code in an ad-hoc manner, but that
// requires manual work and might be error-prone.
@@ -181,6 +187,9 @@ void NVPTXPassConfig::addIRPasses() {
addPass(createEarlyCSEPass());
// Run NaryReassociate after EarlyCSE/GVN to be more effective.
addPass(createNaryReassociatePass());
+ // NaryReassociate on GEPs creates redundant common expressions, so run
+ // EarlyCSE after it.
+ addPass(createEarlyCSEPass());
}
bool NVPTXPassConfig::addInstSelector() {
OpenPOWER on IntegriCloud