summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/PowerPC
diff options
context:
space:
mode:
authordim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
committerdim <dim@FreeBSD.org>2014-03-21 17:53:59 +0000
commit9cedb8bb69b89b0f0c529937247a6a80cabdbaec (patch)
treec978f0e9ec1ab92dc8123783f30b08a7fd1e2a39 /contrib/llvm/lib/Target/PowerPC
parent03fdc2934eb61c44c049a02b02aa974cfdd8a0eb (diff)
downloadFreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.zip
FreeBSD-src-9cedb8bb69b89b0f0c529937247a6a80cabdbaec.tar.gz
MFC 261991:
Upgrade our copy of llvm/clang to 3.4 release. This version supports all of the features in the current working draft of the upcoming C++ standard, provisionally named C++1y. The code generator's performance is greatly increased, and the loop auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The PowerPC backend has made several major improvements to code generation quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ backends have all seen major feature work. Release notes for llvm and clang can be found here: <http://llvm.org/releases/3.4/docs/ReleaseNotes.html> <http://llvm.org/releases/3.4/tools/clang/docs/ReleaseNotes.html> MFC 262121 (by emaste): Update lldb for clang/llvm 3.4 import This commit largely restores the lldb source to the upstream r196259 snapshot with the addition of threaded inferior support and a few bug fixes. Specific upstream lldb revisions restored include: SVN git 181387 779e6ac 181703 7bef4e2 182099 b31044e 182650 f2dcf35 182683 0d91b80 183862 15c1774 183929 99447a6 184177 0b2934b 184948 4dc3761 184954 007e7bc 186990 eebd175 Sponsored by: DARPA, AFRL MFC 262186 (by emaste): Fix mismerge in r262121 A break statement was lost in the merge. The error had no functional impact, but restore it to reduce the diff against upstream. MFC 262303: Pull in r197521 from upstream clang trunk (by rdivacky): Use the integrated assembler by default on FreeBSD/ppc and ppc64. Requested by: jhibbits MFC 262611: Pull in r196874 from upstream llvm trunk: Fix a crash that occurs when PWD is invalid. MCJIT needs to be able to run in hostile environments, even when PWD is invalid. There's no need to crash MCJIT in this case. The obvious fix is to simply leave MCContext's CompilationDir empty when PWD can't be determined. This way, MCJIT clients, and other clients that link with LLVM don't need a valid working directory. If we do want to guarantee valid CompilationDir, that should be done only for clients of getCompilationDir(). This is as simple as checking for an empty string. The only current use of getCompilationDir is EmitGenDwarfInfo, which won't conceivably run with an invalid working dir. However, in the purely hypothetically and untestable case that this happens, the AT_comp_dir will be omitted from the compilation_unit DIE. This should help fix assertions occurring with ports-mgmt/tinderbox, when it is using jails, and sometimes invalidates clang's current working directory. Reported by: decke MFC 262809: Pull in r203007 from upstream clang trunk: Don't produce an alias between destructors with different calling conventions. Fixes pr19007. (Please note that is an LLVM PR identifier, not a FreeBSD one.) This should fix Firefox and/or libxul crashes (due to problems with regparm/stdcall calling conventions) on i386. Reported by: multiple users on freebsd-current PR: bin/187103 MFC 263048: Repair recognition of "CC" as an alias for the C++ compiler, since it was silently broken by upstream for a Windows-specific use-case. Apparently some versions of CMake still rely on this archaic feature... Reported by: rakuco MFC 263049: Garbage collect the old way of adding the libstdc++ include directories in clang's InitHeaderSearch.cpp. This has been superseded by David Chisnall's commit in r255321. Moreover, if libc++ is used, the libstdc++ include directories should not be in the search path at all. These directories are now only used if you pass -stdlib=libstdc++.
Diffstat (limited to 'contrib/llvm/lib/Target/PowerPC')
-rw-r--r--contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp738
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp189
-rw-r--r--contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h18
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp101
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp340
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h28
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h3
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp102
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp155
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h96
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp84
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp389
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp32
-rw-r--r--contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h32
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.h24
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPC.td66
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp218
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp1164
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td70
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp51
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp2236
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp635
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h10
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp6
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h4
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp111
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp1056
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h143
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td165
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td52
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td71
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp65
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h7
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td805
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp40
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp62
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp264
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h11
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td20
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSchedule.td8
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td841
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td2
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td1
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp154
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h28
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp16
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp67
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h35
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h23
-rw-r--r--contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp9
-rw-r--r--contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp5
54 files changed, 7786 insertions, 3092 deletions
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index f2cb8b8..fe83fe1 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -8,15 +8,18 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/MC/MCTargetAsmParser.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCParser/MCAsmLexer.h"
#include "llvm/MC/MCParser/MCAsmParser.h"
#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringSwitch.h"
@@ -106,11 +109,73 @@ static unsigned CRRegs[8] = {
PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7
};
+// Evaluate an expression containing condition register
+// or condition register field symbols. Returns positive
+// value on success, or -1 on error.
+static int64_t
+EvaluateCRExpr(const MCExpr *E) {
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ return -1;
+
+ case MCExpr::Constant: {
+ int64_t Res = cast<MCConstantExpr>(E)->getValue();
+ return Res < 0 ? -1 : Res;
+ }
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+ StringRef Name = SRE->getSymbol().getName();
+
+ if (Name == "lt") return 0;
+ if (Name == "gt") return 1;
+ if (Name == "eq") return 2;
+ if (Name == "so") return 3;
+ if (Name == "un") return 3;
+
+ if (Name == "cr0") return 0;
+ if (Name == "cr1") return 1;
+ if (Name == "cr2") return 2;
+ if (Name == "cr3") return 3;
+ if (Name == "cr4") return 4;
+ if (Name == "cr5") return 5;
+ if (Name == "cr6") return 6;
+ if (Name == "cr7") return 7;
+
+ return -1;
+ }
+
+ case MCExpr::Unary:
+ return -1;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ int64_t LHSVal = EvaluateCRExpr(BE->getLHS());
+ int64_t RHSVal = EvaluateCRExpr(BE->getRHS());
+ int64_t Res;
+
+ if (LHSVal < 0 || RHSVal < 0)
+ return -1;
+
+ switch (BE->getOpcode()) {
+ default: return -1;
+ case MCBinaryExpr::Add: Res = LHSVal + RHSVal; break;
+ case MCBinaryExpr::Mul: Res = LHSVal * RHSVal; break;
+ }
+
+ return Res < 0 ? -1 : Res;
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
struct PPCOperand;
class PPCAsmParser : public MCTargetAsmParser {
MCSubtargetInfo &STI;
MCAsmParser &Parser;
+ const MCInstrInfo &MII;
bool IsPPC64;
MCAsmParser &getParser() const { return Parser; }
@@ -126,10 +191,16 @@ class PPCAsmParser : public MCTargetAsmParser {
virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
+ const MCExpr *ExtractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant);
+ const MCExpr *FixupVariantKind(const MCExpr *E);
+ bool ParseExpression(const MCExpr *&EVal);
+
bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands);
bool ParseDirectiveWord(unsigned Size, SMLoc L);
bool ParseDirectiveTC(unsigned Size, SMLoc L);
+ bool ParseDirectiveMachine(SMLoc L);
bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
@@ -149,11 +220,13 @@ class PPCAsmParser : public MCTargetAsmParser {
public:
- PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
- : MCTargetAsmParser(), STI(_STI), Parser(_Parser) {
+ PPCAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser,
+ const MCInstrInfo &_MII)
+ : MCTargetAsmParser(), STI(_STI), Parser(_Parser), MII(_MII) {
// Check for 64-bit vs. 32-bit pointer mode.
Triple TheTriple(STI.getTargetTriple());
- IsPPC64 = TheTriple.getArch() == Triple::ppc64;
+ IsPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
// Initialize the set of available features.
setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
}
@@ -163,6 +236,12 @@ public:
SmallVectorImpl<MCParsedAsmOperand*> &Operands);
virtual bool ParseDirective(AsmToken DirectiveID);
+
+ unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, unsigned Kind);
+
+ virtual const MCExpr *applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind,
+ MCContext &Ctx);
};
/// PPCOperand - Instances of this class represent a parsed PowerPC machine
@@ -171,7 +250,8 @@ struct PPCOperand : public MCParsedAsmOperand {
enum KindTy {
Token,
Immediate,
- Expression
+ Expression,
+ TLSRegister
} Kind;
SMLoc StartLoc, EndLoc;
@@ -188,12 +268,18 @@ struct PPCOperand : public MCParsedAsmOperand {
struct ExprOp {
const MCExpr *Val;
+ int64_t CRVal; // Cached result of EvaluateCRExpr(Val)
+ };
+
+ struct TLSRegOp {
+ const MCSymbolRefExpr *Sym;
};
union {
struct TokOp Tok;
struct ImmOp Imm;
struct ExprOp Expr;
+ struct TLSRegOp TLSReg;
};
PPCOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -213,6 +299,9 @@ public:
case Expression:
Expr = o.Expr;
break;
+ case TLSRegister:
+ TLSReg = o.TLSReg;
+ break;
}
}
@@ -235,6 +324,16 @@ public:
return Expr.Val;
}
+ int64_t getExprCRVal() const {
+ assert(Kind == Expression && "Invalid access!");
+ return Expr.CRVal;
+ }
+
+ const MCExpr *getTLSReg() const {
+ assert(Kind == TLSRegister && "Invalid access!");
+ return TLSReg.Sym;
+ }
+
unsigned getReg() const {
assert(isRegNumber() && "Invalid access!");
return (unsigned) Imm.Val;
@@ -242,12 +341,17 @@ public:
unsigned getCCReg() const {
assert(isCCRegNumber() && "Invalid access!");
- return (unsigned) Imm.Val;
+ return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
+ }
+
+ unsigned getCRBit() const {
+ assert(isCRBitNumber() && "Invalid access!");
+ return (unsigned) (Kind == Immediate ? Imm.Val : Expr.CRVal);
}
unsigned getCRBitMask() const {
assert(isCRBitMask() && "Invalid access!");
- return 7 - CountTrailingZeros_32(Imm.Val);
+ return 7 - countTrailingZeros<uint64_t>(Imm.Val);
}
bool isToken() const { return Kind == Token; }
@@ -262,9 +366,24 @@ public:
bool isS16ImmX4() const { return Kind == Expression ||
(Kind == Immediate && isInt<16>(getImm()) &&
(getImm() & 3) == 0); }
+ bool isS17Imm() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<17>(getImm())); }
+ bool isTLSReg() const { return Kind == TLSRegister; }
+ bool isDirectBr() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<26>(getImm()) &&
+ (getImm() & 3) == 0); }
+ bool isCondBr() const { return Kind == Expression ||
+ (Kind == Immediate && isInt<16>(getImm()) &&
+ (getImm() & 3) == 0); }
bool isRegNumber() const { return Kind == Immediate && isUInt<5>(getImm()); }
- bool isCCRegNumber() const { return Kind == Immediate &&
- isUInt<3>(getImm()); }
+ bool isCCRegNumber() const { return (Kind == Expression
+ && isUInt<3>(getExprCRVal())) ||
+ (Kind == Immediate
+ && isUInt<3>(getImm())); }
+ bool isCRBitNumber() const { return (Kind == Expression
+ && isUInt<5>(getExprCRVal())) ||
+ (Kind == Immediate
+ && isUInt<5>(getImm())); }
bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
isPowerOf2_32(getImm()); }
bool isMem() const { return false; }
@@ -325,7 +444,7 @@ public:
void addRegCRBITRCOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getReg()]));
+ Inst.addOperand(MCOperand::CreateReg(CRBITRegs[getCRBit()]));
}
void addRegCRRCOperands(MCInst &Inst, unsigned N) const {
@@ -346,20 +465,17 @@ public:
Inst.addOperand(MCOperand::CreateExpr(getExpr()));
}
- void addDispRIOperands(MCInst &Inst, unsigned N) const {
+ void addBranchTargetOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
if (Kind == Immediate)
- Inst.addOperand(MCOperand::CreateImm(getImm()));
+ Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
else
Inst.addOperand(MCOperand::CreateExpr(getExpr()));
}
- void addDispRIXOperands(MCInst &Inst, unsigned N) const {
+ void addTLSRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- if (Kind == Immediate)
- Inst.addOperand(MCOperand::CreateImm(getImm() / 4));
- else
- Inst.addOperand(MCOperand::CreateExpr(getExpr()));
+ Inst.addOperand(MCOperand::CreateExpr(getTLSReg()));
}
StringRef getToken() const {
@@ -379,6 +495,20 @@ public:
return Op;
}
+ static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S,
+ bool IsPPC64) {
+ // Allocate extra memory for the string and copy it.
+ void *Mem = ::operator new(sizeof(PPCOperand) + Str.size());
+ PPCOperand *Op = new (Mem) PPCOperand(Token);
+ Op->Tok.Data = (const char *)(Op + 1);
+ Op->Tok.Length = Str.size();
+ std::memcpy((char *)(Op + 1), Str.data(), Str.size());
+ Op->StartLoc = S;
+ Op->EndLoc = S;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) {
PPCOperand *Op = new PPCOperand(Immediate);
Op->Imm.Val = Val;
@@ -392,11 +522,34 @@ public:
SMLoc S, SMLoc E, bool IsPPC64) {
PPCOperand *Op = new PPCOperand(Expression);
Op->Expr.Val = Val;
+ Op->Expr.CRVal = EvaluateCRExpr(Val);
+ Op->StartLoc = S;
+ Op->EndLoc = E;
+ Op->IsPPC64 = IsPPC64;
+ return Op;
+ }
+
+ static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym,
+ SMLoc S, SMLoc E, bool IsPPC64) {
+ PPCOperand *Op = new PPCOperand(TLSRegister);
+ Op->TLSReg.Sym = Sym;
Op->StartLoc = S;
Op->EndLoc = E;
Op->IsPPC64 = IsPPC64;
return Op;
}
+
+ static PPCOperand *CreateFromMCExpr(const MCExpr *Val,
+ SMLoc S, SMLoc E, bool IsPPC64) {
+ if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val))
+ return CreateImm(CE->getValue(), S, E, IsPPC64);
+
+ if (const MCSymbolRefExpr *SRE = dyn_cast<MCSymbolRefExpr>(Val))
+ if (SRE->getKind() == MCSymbolRefExpr::VK_PPC_TLS)
+ return CreateTLSReg(SRE, S, E, IsPPC64);
+
+ return CreateExpr(Val, S, E, IsPPC64);
+ }
};
} // end anonymous namespace.
@@ -412,6 +565,9 @@ void PPCOperand::print(raw_ostream &OS) const {
case Expression:
getExpr()->print(OS);
break;
+ case TLSRegister:
+ getTLSReg()->print(OS);
+ break;
}
}
@@ -419,11 +575,133 @@ void PPCOperand::print(raw_ostream &OS) const {
void PPCAsmParser::
ProcessInstruction(MCInst &Inst,
const SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
- switch (Inst.getOpcode()) {
- case PPC::SLWI: {
+ int Opcode = Inst.getOpcode();
+ switch (Opcode) {
+ case PPC::LAx: {
+ MCInst TmpInst;
+ TmpInst.setOpcode(PPC::LA);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(2));
+ TmpInst.addOperand(Inst.getOperand(1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBI: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(PPC::RLWINM);
+ TmpInst.setOpcode(PPC::ADDI);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBIS: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDIS);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBIC: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDIC);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SUBICo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(PPC::ADDICo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(-N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTLWI:
+ case PPC::EXTLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTRWI:
+ case PPC::EXTRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B + N));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSLWI:
+ case PPC::INSLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSLWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - B));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSRWI:
+ case PPC::INSRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSRWI? PPC::RLWIMI : PPC::RLWIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - (B + N)));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm((B + N) - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::ROTRWI:
+ case PPC::ROTRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::ROTRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(32 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(31));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLWI:
+ case PPC::SLWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SLWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::CreateImm(N));
@@ -432,10 +710,11 @@ ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
- case PPC::SRWI: {
+ case PPC::SRWI:
+ case PPC::SRWIo: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(PPC::RLWINM);
+ TmpInst.setOpcode(Opcode == PPC::SRWI? PPC::RLWINM : PPC::RLWINMo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::CreateImm(32 - N));
@@ -444,10 +723,90 @@ ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
- case PPC::SLDI: {
+ case PPC::CLRRWI:
+ case PPC::CLRRWIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRRWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRLSLWI:
+ case PPC::CLRLSLWIo: {
+ MCInst TmpInst;
+ int64_t B = Inst.getOperand(2).getImm();
+ int64_t N = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLWI? PPC::RLWINM : PPC::RLWINMo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(B - N));
+ TmpInst.addOperand(MCOperand::CreateImm(31 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTLDI:
+ case PPC::EXTLDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTLDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ TmpInst.addOperand(MCOperand::CreateImm(N - 1));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::EXTRDI:
+ case PPC::EXTRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::EXTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(B + N));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::INSRDI:
+ case PPC::INSRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ int64_t B = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::INSRDI? PPC::RLDIMI : PPC::RLDIMIo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - (B + N)));
+ TmpInst.addOperand(MCOperand::CreateImm(B));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::ROTRDI:
+ case PPC::ROTRDIo: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(PPC::RLDICR);
+ TmpInst.setOpcode(Opcode == PPC::ROTRDI? PPC::RLDICL : PPC::RLDICLo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(64 - N));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::SLDI:
+ case PPC::SLDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::SLDI? PPC::RLDICR : PPC::RLDICRo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::CreateImm(N));
@@ -455,10 +814,11 @@ ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
- case PPC::SRDI: {
+ case PPC::SRDI:
+ case PPC::SRDIo: {
MCInst TmpInst;
int64_t N = Inst.getOperand(2).getImm();
- TmpInst.setOpcode(PPC::RLDICL);
+ TmpInst.setOpcode(Opcode == PPC::SRDI? PPC::RLDICL : PPC::RLDICLo);
TmpInst.addOperand(Inst.getOperand(0));
TmpInst.addOperand(Inst.getOperand(1));
TmpInst.addOperand(MCOperand::CreateImm(64 - N));
@@ -466,6 +826,31 @@ ProcessInstruction(MCInst &Inst,
Inst = TmpInst;
break;
}
+ case PPC::CLRRDI:
+ case PPC::CLRRDIo: {
+ MCInst TmpInst;
+ int64_t N = Inst.getOperand(2).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRRDI? PPC::RLDICR : PPC::RLDICRo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(0));
+ TmpInst.addOperand(MCOperand::CreateImm(63 - N));
+ Inst = TmpInst;
+ break;
+ }
+ case PPC::CLRLSLDI:
+ case PPC::CLRLSLDIo: {
+ MCInst TmpInst;
+ int64_t B = Inst.getOperand(2).getImm();
+ int64_t N = Inst.getOperand(3).getImm();
+ TmpInst.setOpcode(Opcode == PPC::CLRLSLDI? PPC::RLDIC : PPC::RLDICo);
+ TmpInst.addOperand(Inst.getOperand(0));
+ TmpInst.addOperand(Inst.getOperand(1));
+ TmpInst.addOperand(MCOperand::CreateImm(N));
+ TmpInst.addOperand(MCOperand::CreateImm(B - N));
+ Inst = TmpInst;
+ break;
+ }
}
}
@@ -518,19 +903,23 @@ MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
IntVal = 9;
return false;
- } else if (Name.substr(0, 1).equals_lower("r") &&
+ } else if (Name.equals_lower("vrsave")) {
+ RegNo = PPC::VRSAVE;
+ IntVal = 256;
+ return false;
+ } else if (Name.startswith_lower("r") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
return false;
- } else if (Name.substr(0, 1).equals_lower("f") &&
+ } else if (Name.startswith_lower("f") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = FRegs[IntVal];
return false;
- } else if (Name.substr(0, 1).equals_lower("v") &&
+ } else if (Name.startswith_lower("v") &&
!Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
RegNo = VRegs[IntVal];
return false;
- } else if (Name.substr(0, 2).equals_lower("cr") &&
+ } else if (Name.startswith_lower("cr") &&
!Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
RegNo = CRRegs[IntVal];
return false;
@@ -556,6 +945,159 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
return Error(StartLoc, "invalid register name");
}
+/// Extract \code @l/@ha \endcode modifier from expression. Recursively scan
+/// the expression and check for VK_PPC_LO/HI/HA
+/// symbol variants. If all symbols with modifier use the same
+/// variant, return the corresponding PPCMCExpr::VariantKind,
+/// and a modified expression using the default symbol variant.
+/// Otherwise, return NULL.
+const MCExpr *PPCAsmParser::
+ExtractModifierFromExpr(const MCExpr *E,
+ PPCMCExpr::VariantKind &Variant) {
+ MCContext &Context = getParser().getContext();
+ Variant = PPCMCExpr::VK_PPC_None;
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return 0;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Variant = PPCMCExpr::VK_PPC_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Variant = PPCMCExpr::VK_PPC_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Variant = PPCMCExpr::VK_PPC_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ Variant = PPCMCExpr::VK_PPC_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ Variant = PPCMCExpr::VK_PPC_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ Variant = PPCMCExpr::VK_PPC_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ Variant = PPCMCExpr::VK_PPC_HIGHESTA;
+ break;
+ default:
+ return 0;
+ }
+
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = ExtractModifierFromExpr(UE->getSubExpr(), Variant);
+ if (!Sub)
+ return 0;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ PPCMCExpr::VariantKind LHSVariant, RHSVariant;
+ const MCExpr *LHS = ExtractModifierFromExpr(BE->getLHS(), LHSVariant);
+ const MCExpr *RHS = ExtractModifierFromExpr(BE->getRHS(), RHSVariant);
+
+ if (!LHS && !RHS)
+ return 0;
+
+ if (!LHS) LHS = BE->getLHS();
+ if (!RHS) RHS = BE->getRHS();
+
+ if (LHSVariant == PPCMCExpr::VK_PPC_None)
+ Variant = RHSVariant;
+ else if (RHSVariant == PPCMCExpr::VK_PPC_None)
+ Variant = LHSVariant;
+ else if (LHSVariant == RHSVariant)
+ Variant = LHSVariant;
+ else
+ return 0;
+
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// Find all VK_TLSGD/VK_TLSLD symbol references in expression and replace
+/// them by VK_PPC_TLSGD/VK_PPC_TLSLD. This is necessary to avoid having
+/// _GLOBAL_OFFSET_TABLE_ created via ELFObjectWriter::RelocNeedsGOT.
+/// FIXME: This is a hack.
+const MCExpr *PPCAsmParser::
+FixupVariantKind(const MCExpr *E) {
+ MCContext &Context = getParser().getContext();
+
+ switch (E->getKind()) {
+ case MCExpr::Target:
+ case MCExpr::Constant:
+ return E;
+
+ case MCExpr::SymbolRef: {
+ const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(E);
+ MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
+
+ switch (SRE->getKind()) {
+ case MCSymbolRefExpr::VK_TLSGD:
+ Variant = MCSymbolRefExpr::VK_PPC_TLSGD;
+ break;
+ case MCSymbolRefExpr::VK_TLSLD:
+ Variant = MCSymbolRefExpr::VK_PPC_TLSLD;
+ break;
+ default:
+ return E;
+ }
+ return MCSymbolRefExpr::Create(&SRE->getSymbol(), Variant, Context);
+ }
+
+ case MCExpr::Unary: {
+ const MCUnaryExpr *UE = cast<MCUnaryExpr>(E);
+ const MCExpr *Sub = FixupVariantKind(UE->getSubExpr());
+ if (Sub == UE->getSubExpr())
+ return E;
+ return MCUnaryExpr::Create(UE->getOpcode(), Sub, Context);
+ }
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(E);
+ const MCExpr *LHS = FixupVariantKind(BE->getLHS());
+ const MCExpr *RHS = FixupVariantKind(BE->getRHS());
+ if (LHS == BE->getLHS() && RHS == BE->getRHS())
+ return E;
+ return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, Context);
+ }
+ }
+
+ llvm_unreachable("Invalid expression kind!");
+}
+
+/// Parse an expression. This differs from the default "parseExpression"
+/// in that it handles complex \code @l/@ha \endcode modifiers.
+bool PPCAsmParser::
+ParseExpression(const MCExpr *&EVal) {
+ if (getParser().parseExpression(EVal))
+ return true;
+
+ EVal = FixupVariantKind(EVal);
+
+ PPCMCExpr::VariantKind Variant;
+ const MCExpr *E = ExtractModifierFromExpr(EVal, Variant);
+ if (E)
+ EVal = PPCMCExpr::Create(Variant, E, false, getParser().getContext());
+
+ return false;
+}
+
bool PPCAsmParser::
ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
SMLoc S = Parser.getTok().getLoc();
@@ -587,23 +1129,40 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
case AsmToken::Identifier:
case AsmToken::Dot:
case AsmToken::Dollar:
- if (!getParser().parseExpression(EVal))
+ if (!ParseExpression(EVal))
break;
/* fall through */
default:
return Error(S, "unknown operand");
}
- if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(EVal))
- Op = PPCOperand::CreateImm(CE->getValue(), S, E, isPPC64());
- else
- Op = PPCOperand::CreateExpr(EVal, S, E, isPPC64());
-
// Push the parsed operand into the list of operands
+ Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64());
Operands.push_back(Op);
- // Check for D-form memory operands
- if (getLexer().is(AsmToken::LParen)) {
+ // Check whether this is a TLS call expression
+ bool TLSCall = false;
+ if (const MCSymbolRefExpr *Ref = dyn_cast<MCSymbolRefExpr>(EVal))
+ TLSCall = Ref->getSymbol().getName() == "__tls_get_addr";
+
+ if (TLSCall && getLexer().is(AsmToken::LParen)) {
+ const MCExpr *TLSSym;
+
+ Parser.Lex(); // Eat the '('.
+ S = Parser.getTok().getLoc();
+ if (ParseExpression(TLSSym))
+ return Error(S, "invalid TLS call expression");
+ if (getLexer().isNot(AsmToken::RParen))
+ return Error(Parser.getTok().getLoc(), "missing ')'");
+ E = Parser.getTok().getLoc();
+ Parser.Lex(); // Eat the ')'.
+
+ Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64());
+ Operands.push_back(Op);
+ }
+
+ // Otherwise, check for D-form memory operands
+ if (!TLSCall && getLexer().is(AsmToken::LParen)) {
Parser.Lex(); // Eat the '('.
S = Parser.getTok().getLoc();
@@ -644,15 +1203,38 @@ bool PPCAsmParser::
ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
// The first operand is the token for the instruction name.
+ // If the next character is a '+' or '-', we need to add it to the
+ // instruction name, to match what TableGen is doing.
+ std::string NewOpcode;
+ if (getLexer().is(AsmToken::Plus)) {
+ getLexer().Lex();
+ NewOpcode = Name;
+ NewOpcode += '+';
+ Name = NewOpcode;
+ }
+ if (getLexer().is(AsmToken::Minus)) {
+ getLexer().Lex();
+ NewOpcode = Name;
+ NewOpcode += '-';
+ Name = NewOpcode;
+ }
// If the instruction ends in a '.', we need to create a separate
// token for it, to match what TableGen is doing.
size_t Dot = Name.find('.');
StringRef Mnemonic = Name.slice(0, Dot);
- Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
+ if (!NewOpcode.empty()) // Underlying memory for Name is volatile.
+ Operands.push_back(
+ PPCOperand::CreateTokenWithStringCopy(Mnemonic, NameLoc, isPPC64()));
+ else
+ Operands.push_back(PPCOperand::CreateToken(Mnemonic, NameLoc, isPPC64()));
if (Dot != StringRef::npos) {
SMLoc DotLoc = SMLoc::getFromPointer(NameLoc.getPointer() + Dot);
StringRef DotStr = Name.slice(Dot, StringRef::npos);
- Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
+ if (!NewOpcode.empty()) // Underlying memory for Name is volatile.
+ Operands.push_back(
+ PPCOperand::CreateTokenWithStringCopy(DotStr, DotLoc, isPPC64()));
+ else
+ Operands.push_back(PPCOperand::CreateToken(DotStr, DotLoc, isPPC64()));
}
// If there are no more operands then finish
@@ -680,9 +1262,13 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc,
bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
StringRef IDVal = DirectiveID.getIdentifier();
if (IDVal == ".word")
- return ParseDirectiveWord(4, DirectiveID.getLoc());
+ return ParseDirectiveWord(2, DirectiveID.getLoc());
+ if (IDVal == ".llong")
+ return ParseDirectiveWord(8, DirectiveID.getLoc());
if (IDVal == ".tc")
return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+ if (IDVal == ".machine")
+ return ParseDirectiveMachine(DirectiveID.getLoc());
return true;
}
@@ -728,12 +1314,84 @@ bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
return ParseDirectiveWord(Size, L);
}
+/// ParseDirectiveMachine
+/// ::= .machine [ cpu | "push" | "pop" ]
+bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
+ if (getLexer().isNot(AsmToken::Identifier) &&
+ getLexer().isNot(AsmToken::String))
+ return Error(L, "unexpected token in directive");
+
+ StringRef CPU = Parser.getTok().getIdentifier();
+ Parser.Lex();
+
+ // FIXME: Right now, the parser always allows any available
+ // instruction, so the .machine directive is not useful.
+ // Implement ".machine any" (by doing nothing) for the benefit
+ // of existing assembler code. Likewise, we can then implement
+ // ".machine push" and ".machine pop" as no-op.
+ if (CPU != "any" && CPU != "push" && CPU != "pop")
+ return Error(L, "unrecognized machine type");
+
+ if (getLexer().isNot(AsmToken::EndOfStatement))
+ return Error(L, "unexpected token in directive");
+
+ return false;
+}
+
/// Force static initialization.
extern "C" void LLVMInitializePowerPCAsmParser() {
RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
+ RegisterMCAsmParser<PPCAsmParser> C(ThePPC64LETarget);
}
#define GET_REGISTER_MATCHER
#define GET_MATCHER_IMPLEMENTATION
#include "PPCGenAsmMatcher.inc"
+
+// Define this matcher function after the auto-generated include so we
+// have the match class enum definitions.
+unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp,
+ unsigned Kind) {
+ // If the kind is a token for a literal immediate, check if our asm
+ // operand matches. This is for InstAliases which have a fixed-value
+ // immediate in the syntax.
+ int64_t ImmVal;
+ switch (Kind) {
+ case MCK_0: ImmVal = 0; break;
+ case MCK_1: ImmVal = 1; break;
+ case MCK_2: ImmVal = 2; break;
+ case MCK_3: ImmVal = 3; break;
+ default: return Match_InvalidOperand;
+ }
+
+ PPCOperand *Op = static_cast<PPCOperand*>(AsmOp);
+ if (Op->isImm() && Op->getImm() == ImmVal)
+ return Match_Success;
+
+ return Match_InvalidOperand;
+}
+
+const MCExpr *
+PPCAsmParser::applyModifierToExpr(const MCExpr *E,
+ MCSymbolRefExpr::VariantKind Variant,
+ MCContext &Ctx) {
+ switch (Variant) {
+ case MCSymbolRefExpr::VK_PPC_LO:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_LO, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HI:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HI, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHER, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHERA, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHEST, E, false, Ctx);
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ return PPCMCExpr::Create(PPCMCExpr::VK_PPC_HIGHESTA, E, false, Ctx);
+ default:
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index 93fca00..8281b5c 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -18,9 +18,17 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
+// FIXME: Once the integrated assembler supports full register names, tie this
+// to the verbose-asm setting.
+static cl::opt<bool>
+FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
+ cl::desc("Use full register names when printing assembly"));
+
#include "PPCGenAsmWriter.inc"
void PPCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
@@ -78,6 +86,17 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
}
}
+ // For fast-isel, a COPY_TO_REGCLASS may survive this long. This is
+ // used when converting a 32-bit float to a 64-bit float as part of
+ // conversion to an integer (see PPCFastISel.cpp:SelectFPToI()),
+ // as otherwise we have problems with incorrect register classes
+ // in machine instruction verification. For now, just avoid trying
+ // to print it as such an instruction has no effect (a 32-bit float
+ // in a register is already in 64-bit form, just with lower
+ // precision). FIXME: Is there a better solution?
+ if (MI->getOpcode() == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
printInstruction(MI, O);
printAnnotation(O, Annot);
}
@@ -90,19 +109,87 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
if (StringRef(Modifier) == "cc") {
switch ((PPC::Predicate)Code) {
- case PPC::PRED_LT: O << "lt"; return;
- case PPC::PRED_LE: O << "le"; return;
- case PPC::PRED_EQ: O << "eq"; return;
- case PPC::PRED_GE: O << "ge"; return;
- case PPC::PRED_GT: O << "gt"; return;
- case PPC::PRED_NE: O << "ne"; return;
- case PPC::PRED_UN: O << "un"; return;
- case PPC::PRED_NU: O << "nu"; return;
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LT_PLUS:
+ case PPC::PRED_LT:
+ O << "lt";
+ return;
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_LE_PLUS:
+ case PPC::PRED_LE:
+ O << "le";
+ return;
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_EQ_PLUS:
+ case PPC::PRED_EQ:
+ O << "eq";
+ return;
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GE_PLUS:
+ case PPC::PRED_GE:
+ O << "ge";
+ return;
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_GT_PLUS:
+ case PPC::PRED_GT:
+ O << "gt";
+ return;
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_NE_PLUS:
+ case PPC::PRED_NE:
+ O << "ne";
+ return;
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_UN_PLUS:
+ case PPC::PRED_UN:
+ O << "un";
+ return;
+ case PPC::PRED_NU_MINUS:
+ case PPC::PRED_NU_PLUS:
+ case PPC::PRED_NU:
+ O << "nu";
+ return;
}
+ llvm_unreachable("Invalid predicate code");
+ }
+
+ if (StringRef(Modifier) == "pm") {
+ switch ((PPC::Predicate)Code) {
+ case PPC::PRED_LT:
+ case PPC::PRED_LE:
+ case PPC::PRED_EQ:
+ case PPC::PRED_GE:
+ case PPC::PRED_GT:
+ case PPC::PRED_NE:
+ case PPC::PRED_UN:
+ case PPC::PRED_NU:
+ return;
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_NU_MINUS:
+ O << "-";
+ return;
+ case PPC::PRED_LT_PLUS:
+ case PPC::PRED_LE_PLUS:
+ case PPC::PRED_EQ_PLUS:
+ case PPC::PRED_GE_PLUS:
+ case PPC::PRED_GT_PLUS:
+ case PPC::PRED_NE_PLUS:
+ case PPC::PRED_UN_PLUS:
+ case PPC::PRED_NU_PLUS:
+ O << "+";
+ return;
+ }
+ llvm_unreachable("Invalid predicate code");
}
assert(StringRef(Modifier) == "reg" &&
- "Need to specify 'cc' or 'reg' as predicate op modifier!");
+ "Need to specify 'cc', 'pm' or 'reg' as predicate op modifier!");
printOperand(MI, OpNo+1, O);
}
@@ -129,18 +216,16 @@ void PPCInstPrinter::printU6ImmOperand(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printS16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- O << (short)MI->getOperand(OpNo).getImm();
+ if (MI->getOperand(OpNo).isImm())
+ O << (short)MI->getOperand(OpNo).getImm();
+ else
+ printOperand(MI, OpNo, O);
}
void PPCInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- O << (unsigned short)MI->getOperand(OpNo).getImm();
-}
-
-void PPCInstPrinter::printS16X4ImmOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
if (MI->getOperand(OpNo).isImm())
- O << (short)(MI->getOperand(OpNo).getImm()*4);
+ O << (unsigned short)MI->getOperand(OpNo).getImm();
else
printOperand(MI, OpNo, O);
}
@@ -153,11 +238,14 @@ void PPCInstPrinter::printBranchOperand(const MCInst *MI, unsigned OpNo,
// Branches can take an immediate operand. This is used by the branch
// selection pass to print .+8, an eight byte displacement from the PC.
O << ".+";
- printAbsAddrOperand(MI, OpNo, O);
+ printAbsBranchOperand(MI, OpNo, O);
}
-void PPCInstPrinter::printAbsAddrOperand(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
+void PPCInstPrinter::printAbsBranchOperand(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ if (!MI->getOperand(OpNo).isImm())
+ return printOperand(MI, OpNo, O);
+
O << (int)MI->getOperand(OpNo).getImm()*4;
}
@@ -182,7 +270,7 @@ void PPCInstPrinter::printcrbitm(const MCInst *MI, unsigned OpNo,
void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
- printSymbolLo(MI, OpNo, O);
+ printS16ImmOperand(MI, OpNo, O);
O << '(';
if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
O << "0";
@@ -191,22 +279,6 @@ void PPCInstPrinter::printMemRegImm(const MCInst *MI, unsigned OpNo,
O << ')';
}
-void PPCInstPrinter::printMemRegImmShifted(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm())
- printS16X4ImmOperand(MI, OpNo, O);
- else
- printSymbolLo(MI, OpNo, O);
- O << '(';
-
- if (MI->getOperand(OpNo+1).getReg() == PPC::R0)
- O << "0";
- else
- printOperand(MI, OpNo+1, O);
- O << ')';
-}
-
-
void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
// When used as the base register, r0 reads constant zero rather than
@@ -220,11 +292,21 @@ void PPCInstPrinter::printMemRegReg(const MCInst *MI, unsigned OpNo,
printOperand(MI, OpNo+1, O);
}
+void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
+ raw_ostream &O) {
+ printBranchOperand(MI, OpNo, O);
+ O << '(';
+ printOperand(MI, OpNo+1, O);
+ O << ')';
+}
/// stripRegisterPrefix - This method strips the character prefix from a
/// register name so that only the number is left. Used by for linux asm.
static const char *stripRegisterPrefix(const char *RegName) {
+ if (FullRegNames)
+ return RegName;
+
switch (RegName[0]) {
case 'r':
case 'f':
@@ -256,39 +338,4 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
assert(Op.isExpr() && "unknown operand kind in printOperand");
O << *Op.getExpr();
}
-
-void PPCInstPrinter::printSymbolLo(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm())
- return printS16ImmOperand(MI, OpNo, O);
-
- // FIXME: This is a terrible hack because we can't encode lo16() as an operand
- // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
- if (MI->getOperand(OpNo).isExpr() &&
- isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
- O << "lo16(";
- printOperand(MI, OpNo, O);
- O << ')';
- } else {
- printOperand(MI, OpNo, O);
- }
-}
-
-void PPCInstPrinter::printSymbolHi(const MCInst *MI, unsigned OpNo,
- raw_ostream &O) {
- if (MI->getOperand(OpNo).isImm())
- return printS16ImmOperand(MI, OpNo, O);
-
- // FIXME: This is a terrible hack because we can't encode lo16() as an operand
- // flag of a subtraction. See the FIXME in GetSymbolRef in PPCMCInstLower.
- if (MI->getOperand(OpNo).isExpr() &&
- isa<MCBinaryExpr>(MI->getOperand(OpNo).getExpr())) {
- O << "ha16(";
- printOperand(MI, OpNo, O);
- O << ')';
- } else {
- printOperand(MI, OpNo, O);
- }
-}
-
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index 8f1e211..8a4c03d 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -21,15 +21,14 @@ namespace llvm {
class MCOperand;
class PPCInstPrinter : public MCInstPrinter {
- // 0 -> AIX, 1 -> Darwin.
- unsigned SyntaxVariant;
+ bool IsDarwin;
public:
PPCInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI, unsigned syntaxVariant)
- : MCInstPrinter(MAI, MII, MRI), SyntaxVariant(syntaxVariant) {}
+ const MCRegisterInfo &MRI, bool isDarwin)
+ : MCInstPrinter(MAI, MII, MRI), IsDarwin(isDarwin) {}
bool isDarwinSyntax() const {
- return SyntaxVariant == 1;
+ return IsDarwin;
}
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
@@ -50,19 +49,14 @@ public:
void printU6ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printS16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printS16X4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printAbsAddrOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printAbsBranchOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+ void printTLSCall(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printcrbitm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemRegImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printMemRegImmShifted(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemRegReg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-
- // FIXME: Remove
- void printSymbolLo(const MCInst *MI, unsigned OpNo, raw_ostream &O);
- void printSymbolHi(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index ec26574..0d42081 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -16,13 +16,13 @@
#include "llvm/MC/MCObjectWriter.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCValue.h"
-#include "llvm/Object/MachOFormat.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MachO.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
+static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) {
switch (Kind) {
default:
llvm_unreachable("Unknown fixup kind!");
@@ -30,40 +30,45 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_2:
case FK_Data_4:
case FK_Data_8:
- case PPC::fixup_ppc_tlsreg:
case PPC::fixup_ppc_nofixup:
return Value;
case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
return Value & 0xfffc;
case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
return Value & 0x3fffffc;
-#if 0
- case PPC::fixup_ppc_hi16:
- return (Value >> 16) & 0xffff;
-#endif
- case PPC::fixup_ppc_ha16:
- return ((Value >> 16) + ((Value & 0x8000) ? 1 : 0)) & 0xffff;
- case PPC::fixup_ppc_lo16:
+ case PPC::fixup_ppc_half16:
return Value & 0xffff;
- case PPC::fixup_ppc_lo16_ds:
+ case PPC::fixup_ppc_half16ds:
return Value & 0xfffc;
}
}
-namespace {
-class PPCMachObjectWriter : public MCMachObjectTargetWriter {
-public:
- PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
- uint32_t CPUSubtype)
- : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
-
- void RecordRelocation(MachObjectWriter *Writer,
- const MCAssembler &Asm, const MCAsmLayout &Layout,
- const MCFragment *Fragment, const MCFixup &Fixup,
- MCValue Target, uint64_t &FixedValue) {
- llvm_unreachable("Relocation emission for MachO/PPC unimplemented!");
+static unsigned getFixupKindNumBytes(unsigned Kind) {
+ switch (Kind) {
+ default:
+ llvm_unreachable("Unknown fixup kind!");
+ case FK_Data_1:
+ return 1;
+ case FK_Data_2:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_half16ds:
+ return 2;
+ case FK_Data_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
+ case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
+ return 4;
+ case FK_Data_8:
+ return 8;
+ case PPC::fixup_ppc_nofixup:
+ return 0;
}
-};
+}
+
+namespace {
class PPCAsmBackend : public MCAsmBackend {
const Target &TheTarget;
@@ -77,10 +82,10 @@ public:
// name offset bits flags
{ "fixup_ppc_br24", 6, 24, MCFixupKindInfo::FKF_IsPCRel },
{ "fixup_ppc_brcond14", 16, 14, MCFixupKindInfo::FKF_IsPCRel },
- { "fixup_ppc_lo16", 16, 16, 0 },
- { "fixup_ppc_ha16", 16, 16, 0 },
- { "fixup_ppc_lo16_ds", 16, 14, 0 },
- { "fixup_ppc_tlsreg", 0, 0, 0 },
+ { "fixup_ppc_br24abs", 6, 24, 0 },
+ { "fixup_ppc_brcond14abs", 16, 14, 0 },
+ { "fixup_ppc_half16", 0, 16, 0 },
+ { "fixup_ppc_half16ds", 0, 14, 0 },
{ "fixup_ppc_nofixup", 0, 0, 0 }
};
@@ -98,12 +103,13 @@ public:
if (!Value) return; // Doesn't change encoding.
unsigned Offset = Fixup.getOffset();
+ unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
// For each byte of the fragment that the fixup touches, mask in the bits
// from the fixup value. The Value has been "split up" into the appropriate
// bitfields above.
- for (unsigned i = 0; i != 4; ++i)
- Data[Offset + i] |= uint8_t((Value >> ((4 - i - 1)*8)) & 0xff);
+ for (unsigned i = 0; i != NumBytes; ++i)
+ Data[Offset + i] |= uint8_t((Value >> ((NumBytes - i - 1)*8)) & 0xff);
}
bool mayNeedRelaxation(const MCInst &Inst) const {
@@ -126,16 +132,23 @@ public:
}
bool writeNopData(uint64_t Count, MCObjectWriter *OW) const {
- // FIXME: Zero fill for now. That's not right, but at least will get the
- // section size right.
- for (uint64_t i = 0; i != Count; ++i)
- OW->Write8(0);
+ uint64_t NumNops = Count / 4;
+ for (uint64_t i = 0; i != NumNops; ++i)
+ OW->Write32(0x60000000);
+
+ switch (Count % 4) {
+ default: break; // No leftover bytes to write
+ case 1: OW->Write8(0); break;
+ case 2: OW->Write16(0); break;
+ case 3: OW->Write16(0); OW->Write8(0); break;
+ }
+
return true;
}
unsigned getPointerSize() const {
StringRef Name = TheTarget.getName();
- if (Name == "ppc64") return 8;
+ if (Name == "ppc64" || Name == "ppc64le") return 8;
assert(Name == "ppc32" && "Unknown target name!");
return 4;
}
@@ -151,12 +164,11 @@ namespace {
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
bool is64 = getPointerSize() == 8;
- return createMachObjectWriter(new PPCMachObjectWriter(
- /*Is64Bit=*/is64,
- (is64 ? object::mach::CTM_PowerPC64 :
- object::mach::CTM_PowerPC),
- object::mach::CSPPC_ALL),
- OS, /*IsLittleEndian=*/false);
+ return createPPCMachObjectWriter(
+ OS,
+ /*Is64Bit=*/is64,
+ (is64 ? MachO::CPU_TYPE_POWERPC64 : MachO::CPU_TYPE_POWERPC),
+ MachO::CPU_SUBTYPE_POWERPC_ALL);
}
virtual bool doesSectionRequireSymbols(const MCSection &Section) const {
@@ -183,10 +195,9 @@ namespace {
} // end anonymous namespace
-
-
-
-MCAsmBackend *llvm::createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU) {
+MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
+ const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU) {
if (Triple(TT).isOSDarwin())
return new DarwinPPCAsmBackend(T);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 7a84723..54de70e 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -30,29 +30,17 @@ namespace {
virtual unsigned GetRelocType(const MCValue &Target, const MCFixup &Fixup,
bool IsPCRel, bool IsRelocWithSymbol,
int64_t Addend) const;
+ virtual const MCSymbol *ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const;
virtual const MCSymbol *undefinedExplicitRelSym(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const;
- virtual void adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset);
-
- virtual void sortRelocs(const MCAssembler &Asm,
- std::vector<ELFRelocationEntry> &Relocs);
- };
-
- class PPCELFRelocationEntry : public ELFRelocationEntry {
- public:
- PPCELFRelocationEntry(const ELFRelocationEntry &RE);
- bool operator<(const PPCELFRelocationEntry &RE) const {
- return (RE.r_offset < r_offset ||
- (RE.r_offset == r_offset && RE.Type > Type));
- }
};
}
-PPCELFRelocationEntry::PPCELFRelocationEntry(const ELFRelocationEntry &RE)
- : ELFRelocationEntry(RE.r_offset, RE.Index, RE.Type, RE.Symbol,
- RE.r_addend, *RE.Fixup) {}
-
PPCELFObjectWriter::PPCELFObjectWriter(bool Is64Bit, uint8_t OSABI)
: MCELFObjectTargetWriter(Is64Bit, OSABI,
Is64Bit ? ELF::EM_PPC64 : ELF::EM_PPC,
@@ -75,11 +63,30 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
default:
llvm_unreachable("Unimplemented");
case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
Type = ELF::R_PPC_REL24;
break;
case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
Type = ELF::R_PPC_REL14;
break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default: llvm_unreachable("Unsupported Modifier");
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_REL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC_REL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = ELF::R_PPC_REL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = ELF::R_PPC_REL16_HA;
+ break;
+ }
+ break;
case FK_Data_4:
case FK_PCRel_4:
Type = ELF::R_PPC_REL32;
@@ -92,93 +99,216 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
} else {
switch ((unsigned)Fixup.getKind()) {
default: llvm_unreachable("invalid fixup kind!");
- case PPC::fixup_ppc_br24:
+ case PPC::fixup_ppc_br24abs:
Type = ELF::R_PPC_ADDR24;
break;
- case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_brcond14abs:
Type = ELF::R_PPC_ADDR14; // XXX: or BRNTAKEN?_
break;
- case PPC::fixup_ppc_ha16:
+ case PPC::fixup_ppc_half16:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
- case MCSymbolRefExpr::VK_PPC_TPREL16_HA:
- Type = ELF::R_PPC_TPREL16_HA;
+ case MCSymbolRefExpr::VK_None:
+ Type = ELF::R_PPC_ADDR16;
break;
- case MCSymbolRefExpr::VK_PPC_DTPREL16_HA:
- Type = ELF::R_PPC64_DTPREL16_HA;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = ELF::R_PPC_ADDR16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = ELF::R_PPC_ADDR16_HI;
break;
- case MCSymbolRefExpr::VK_PPC_GAS_HA16:
- case MCSymbolRefExpr::VK_PPC_DARWIN_HA16:
+ case MCSymbolRefExpr::VK_PPC_HA:
Type = ELF::R_PPC_ADDR16_HA;
- break;
- case MCSymbolRefExpr::VK_PPC_TOC16_HA:
- Type = ELF::R_PPC64_TOC16_HA;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA:
- Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ case MCSymbolRefExpr::VK_PPC_HIGHER:
+ Type = ELF::R_PPC64_ADDR16_HIGHER;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
- Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ case MCSymbolRefExpr::VK_PPC_HIGHERA:
+ Type = ELF::R_PPC64_ADDR16_HIGHERA;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA:
- Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ case MCSymbolRefExpr::VK_PPC_HIGHEST:
+ Type = ELF::R_PPC64_ADDR16_HIGHEST;
break;
- }
- break;
- case PPC::fixup_ppc_lo16:
- switch (Modifier) {
- default: llvm_unreachable("Unsupported Modifier");
- case MCSymbolRefExpr::VK_PPC_TPREL16_LO:
- Type = ELF::R_PPC_TPREL16_LO;
+ case MCSymbolRefExpr::VK_PPC_HIGHESTA:
+ Type = ELF::R_PPC64_ADDR16_HIGHESTA;
break;
- case MCSymbolRefExpr::VK_PPC_DTPREL16_LO:
- Type = ELF::R_PPC64_DTPREL16_LO;
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_PPC_GOT16;
break;
- case MCSymbolRefExpr::VK_None:
- Type = ELF::R_PPC_ADDR16;
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ Type = ELF::R_PPC_GOT16_LO;
break;
- case MCSymbolRefExpr::VK_PPC_GAS_LO16:
- case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
- Type = ELF::R_PPC_ADDR16_LO;
- break;
- case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ case MCSymbolRefExpr::VK_PPC_GOT_HI:
+ Type = ELF::R_PPC_GOT16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_HA:
+ Type = ELF::R_PPC_GOT16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC:
Type = ELF::R_PPC64_TOC16;
break;
- case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ case MCSymbolRefExpr::VK_PPC_TOC_LO:
Type = ELF::R_PPC64_TOC16_LO;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
+ case MCSymbolRefExpr::VK_PPC_TOC_HI:
+ Type = ELF::R_PPC64_TOC16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC_HA:
+ Type = ELF::R_PPC64_TOC16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC_TPREL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_LO:
+ Type = ELF::R_PPC_TPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HI:
+ Type = ELF::R_PPC_TPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HA:
+ Type = ELF::R_PPC_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHER:
+ Type = ELF::R_PPC64_TPREL16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHERA:
+ Type = ELF::R_PPC64_TPREL16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHEST:
+ Type = ELF::R_PPC64_TPREL16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_HIGHESTA:
+ Type = ELF::R_PPC64_TPREL16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HI:
+ Type = ELF::R_PPC64_DTPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HA:
+ Type = ELF::R_PPC64_DTPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHER:
+ Type = ELF::R_PPC64_DTPREL16_HIGHER;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHERA:
+ Type = ELF::R_PPC64_DTPREL16_HIGHERA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHEST:
+ Type = ELF::R_PPC64_DTPREL16_HIGHEST;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHESTA:
+ Type = ELF::R_PPC64_DTPREL16_HIGHESTA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD:
+ Type = ELF::R_PPC64_GOT_TLSGD16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO:
Type = ELF::R_PPC64_GOT_TLSGD16_LO;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO:
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA:
+ Type = ELF::R_PPC64_GOT_TLSGD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD:
+ Type = ELF::R_PPC64_GOT_TLSLD16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO:
Type = ELF::R_PPC64_GOT_TLSLD16_LO;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA:
+ Type = ELF::R_PPC64_GOT_TLSLD16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL:
+ /* We don't have R_PPC64_GOT_TPREL16, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_TPREL16_DS. */
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
+ /* We don't have R_PPC64_GOT_TPREL16_LO, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_TPREL16_LO_DS. */
+ Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI:
+ Type = ELF::R_PPC64_GOT_TPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
+ /* We don't have R_PPC64_GOT_DTPREL16, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_DS. */
+ Type = ELF::R_PPC64_GOT_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
+ /* We don't have R_PPC64_GOT_DTPREL16_LO, but since GOT offsets
+ are always 4-aligned, we can use R_PPC64_GOT_DTPREL16_LO_DS. */
+ Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA:
+ Type = ELF::R_PPC64_GOT_TPREL16_HA;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI:
+ Type = ELF::R_PPC64_GOT_DTPREL16_HI;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA:
+ Type = ELF::R_PPC64_GOT_DTPREL16_HA;
+ break;
}
break;
- case PPC::fixup_ppc_lo16_ds:
+ case PPC::fixup_ppc_half16ds:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC64_ADDR16_DS;
break;
- case MCSymbolRefExpr::VK_PPC_GAS_LO16:
- case MCSymbolRefExpr::VK_PPC_DARWIN_LO16:
+ case MCSymbolRefExpr::VK_PPC_LO:
Type = ELF::R_PPC64_ADDR16_LO_DS;
break;
- case MCSymbolRefExpr::VK_PPC_TOC_ENTRY:
+ case MCSymbolRefExpr::VK_GOT:
+ Type = ELF::R_PPC64_GOT16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ Type = ELF::R_PPC64_GOT16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TOC:
Type = ELF::R_PPC64_TOC16_DS;
break;
- case MCSymbolRefExpr::VK_PPC_TOC16_LO:
+ case MCSymbolRefExpr::VK_PPC_TOC_LO:
Type = ELF::R_PPC64_TOC16_LO_DS;
break;
- case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO:
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC64_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL_LO:
+ Type = ELF::R_PPC64_TPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL_LO:
+ Type = ELF::R_PPC64_DTPREL16_LO_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL:
+ Type = ELF::R_PPC64_GOT_TPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO:
Type = ELF::R_PPC64_GOT_TPREL16_LO_DS;
break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL:
+ Type = ELF::R_PPC64_GOT_DTPREL16_DS;
+ break;
+ case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO:
+ Type = ELF::R_PPC64_GOT_DTPREL16_LO_DS;
+ break;
}
break;
- case PPC::fixup_ppc_tlsreg:
- Type = ELF::R_PPC64_TLS;
- break;
case PPC::fixup_ppc_nofixup:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
@@ -188,17 +318,29 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TLSLD:
Type = ELF::R_PPC64_TLSLD;
break;
+ case MCSymbolRefExpr::VK_PPC_TLS:
+ Type = ELF::R_PPC64_TLS;
+ break;
}
break;
case FK_Data_8:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");
- case MCSymbolRefExpr::VK_PPC_TOC:
+ case MCSymbolRefExpr::VK_PPC_TOCBASE:
Type = ELF::R_PPC64_TOC;
break;
case MCSymbolRefExpr::VK_None:
Type = ELF::R_PPC64_ADDR64;
break;
+ case MCSymbolRefExpr::VK_PPC_DTPMOD:
+ Type = ELF::R_PPC64_DTPMOD64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_TPREL:
+ Type = ELF::R_PPC64_TPREL64;
+ break;
+ case MCSymbolRefExpr::VK_PPC_DTPREL:
+ Type = ELF::R_PPC64_DTPREL64;
+ break;
}
break;
case FK_Data_4:
@@ -220,6 +362,35 @@ unsigned PPCELFObjectWriter::GetRelocType(const MCValue &Target,
return getRelocTypeInner(Target, Fixup, IsPCRel);
}
+const MCSymbol *PPCELFObjectWriter::ExplicitRelSym(const MCAssembler &Asm,
+ const MCValue &Target,
+ const MCFragment &F,
+ const MCFixup &Fixup,
+ bool IsPCRel) const {
+ assert(Target.getSymA() && "SymA cannot be 0");
+ MCSymbolRefExpr::VariantKind Modifier = Target.isAbsolute() ?
+ MCSymbolRefExpr::VK_None : Target.getSymA()->getKind();
+
+ bool EmitThisSym;
+ switch (Modifier) {
+ // GOT references always need a relocation, even if the
+ // target symbol is local.
+ case MCSymbolRefExpr::VK_GOT:
+ case MCSymbolRefExpr::VK_PPC_GOT_LO:
+ case MCSymbolRefExpr::VK_PPC_GOT_HI:
+ case MCSymbolRefExpr::VK_PPC_GOT_HA:
+ EmitThisSym = true;
+ break;
+ default:
+ EmitThisSym = false;
+ break;
+ }
+
+ if (EmitThisSym)
+ return &Target.getSymA()->getSymbol().AliasedSymbol();
+ return NULL;
+}
+
const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
@@ -240,47 +411,6 @@ const MCSymbol *PPCELFObjectWriter::undefinedExplicitRelSym(const MCValue &Targe
return NULL;
}
-void PPCELFObjectWriter::
-adjustFixupOffset(const MCFixup &Fixup, uint64_t &RelocOffset) {
- switch ((unsigned)Fixup.getKind()) {
- case PPC::fixup_ppc_ha16:
- case PPC::fixup_ppc_lo16:
- case PPC::fixup_ppc_lo16_ds:
- RelocOffset += 2;
- break;
- default:
- break;
- }
-}
-
-// The standard sorter only sorts on the r_offset field, but PowerPC can
-// have multiple relocations at the same offset. Sort secondarily on the
-// relocation type to avoid nondeterminism.
-void PPCELFObjectWriter::sortRelocs(const MCAssembler &Asm,
- std::vector<ELFRelocationEntry> &Relocs) {
-
- // Copy to a temporary vector of relocation entries having a different
- // sort function.
- std::vector<PPCELFRelocationEntry> TmpRelocs;
-
- for (std::vector<ELFRelocationEntry>::iterator R = Relocs.begin();
- R != Relocs.end(); ++R) {
- TmpRelocs.push_back(PPCELFRelocationEntry(*R));
- }
-
- // Sort in place by ascending r_offset and descending r_type.
- array_pod_sort(TmpRelocs.begin(), TmpRelocs.end());
-
- // Copy back to the original vector.
- unsigned I = 0;
- for (std::vector<PPCELFRelocationEntry>::iterator R = TmpRelocs.begin();
- R != TmpRelocs.end(); ++R, ++I) {
- Relocs[I] = ELFRelocationEntry(R->r_offset, R->Index, R->Type,
- R->Symbol, R->r_addend, *R->Fixup);
- }
-}
-
-
MCObjectWriter *llvm::createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
index 86c44f5..68de8c1 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h
@@ -25,23 +25,25 @@ enum Fixups {
/// branches.
fixup_ppc_brcond14,
- /// fixup_ppc_lo16 - A 16-bit fixup corresponding to lo16(_foo) for instrs
- /// like 'li'.
- fixup_ppc_lo16,
-
- /// fixup_ppc_ha16 - A 16-bit fixup corresponding to ha16(_foo) for instrs
- /// like 'lis'.
- fixup_ppc_ha16,
+ /// fixup_ppc_br24abs - 24-bit absolute relocation for direct branches
+ /// like 'ba' and 'bla'.
+ fixup_ppc_br24abs,
+
+ /// fixup_ppc_brcond14abs - 14-bit absolute relocation for conditional
+ /// branches.
+ fixup_ppc_brcond14abs,
+
+ /// fixup_ppc_half16 - A 16-bit fixup corresponding to lo16(_foo)
+ /// or ha16(_foo) for instrs like 'li' or 'addis'.
+ fixup_ppc_half16,
- /// fixup_ppc_lo16_ds - A 14-bit fixup corresponding to lo16(_foo) with
+ /// fixup_ppc_half16ds - A 14-bit fixup corresponding to lo16(_foo) with
/// implied 2 zero bits for instrs like 'std'.
- fixup_ppc_lo16_ds,
-
- /// fixup_ppc_tlsreg - Insert thread-pointer register number.
- fixup_ppc_tlsreg,
+ fixup_ppc_half16ds,
/// fixup_ppc_nofixup - Not a true fixup, but ties a symbol to a call
- /// to __tls_get_addr for the TLS general and local dynamic models.
+ /// to __tls_get_addr for the TLS general and local dynamic models,
+ /// or inserts the thread-pointer register number.
fixup_ppc_nofixup,
// Marker
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index a25d7fe..f3dddce 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -22,7 +22,6 @@ PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
}
IsLittleEndian = false;
- PCSymbol = ".";
CommentString = ";";
ExceptionsType = ExceptionHandling::DwarfCFI;
@@ -47,24 +46,24 @@ PPCLinuxMCAsmInfo::PPCLinuxMCAsmInfo(bool is64Bit) {
CommentString = "#";
GlobalPrefix = "";
PrivateGlobalPrefix = ".L";
- WeakRefDirective = "\t.weak\t";
-
+
// Uses '.section' before '.bss' directive
UsesELFSectionDirectiveForBSS = true;
// Debug Information
SupportsDebugInformation = true;
- PCSymbol = ".";
+ DollarIsPC = true;
// Set up DWARF directives
HasLEB128 = true; // Target asm supports leb128 directives (little-endian)
+ MinInstAlignment = 4;
// Exceptions handling
ExceptionsType = ExceptionHandling::DwarfCFI;
ZeroDirective = "\t.space\t";
Data64bitsDirective = is64Bit ? "\t.quad\t" : 0;
- AssemblerDialect = 0; // Old-Style mnemonics.
+ AssemblerDialect = 1; // New-Style mnemonics.
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 7b4ed9f..1530e77 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -15,6 +15,7 @@
#define PPCTARGETASMINFO_H
#include "llvm/MC/MCAsmInfoDarwin.h"
+#include "llvm/MC/MCAsmInfoELF.h"
namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
explicit PPCMCAsmInfoDarwin(bool is64Bit);
};
- class PPCLinuxMCAsmInfo : public MCAsmInfo {
+ class PPCLinuxMCAsmInfo : public MCAsmInfoELF {
virtual void anchor();
public:
explicit PPCLinuxMCAsmInfo(bool is64Bit);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index 2223cd6..346a9be 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetOpcodes.h"
using namespace llvm;
STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
@@ -48,16 +49,20 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getCondBrEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getHA16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
- unsigned getLO16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
+ unsigned getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
@@ -72,13 +77,19 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups) const {
+ // For fast-isel, a float COPY_TO_REGCLASS can survive this long.
+ // It's just a nop to keep the register classes happy, so don't
+ // generate anything.
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode == TargetOpcode::COPY_TO_REGCLASS)
+ return;
+
uint64_t Bits = getBinaryCodeForInstr(MI, Fixups);
// BL8_NOP etc. all have a size of 8 because of the following 'nop'.
unsigned Size = 4; // FIXME: Have Desc.getSize() return the correct value!
- unsigned Opcode = MI.getOpcode();
if (Opcode == PPC::BL8_NOP || Opcode == PPC::BLA8_NOP ||
- Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD)
+ Opcode == PPC::BL8_NOP_TLS)
Size = 8;
// Output the constant in big endian byte order.
@@ -111,17 +122,6 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
-
- // For special TLS calls, add another fixup for the symbol. Apparently
- // BL8_NOP, BL8_NOP_TLSGD, and BL8_NOP_TLSLD are sufficiently
- // similar that TblGen will not generate a separate case for the latter
- // two, so this is the only way to get the extra fixup generated.
- unsigned Opcode = MI.getOpcode();
- if (Opcode == PPC::BL8_NOP_TLSGD || Opcode == PPC::BL8_NOP_TLSLD) {
- const MCOperand &MO2 = MI.getOperand(OpNo+1);
- Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_nofixup));
- }
return 0;
}
@@ -136,25 +136,38 @@ unsigned PPCMCCodeEmitter::getCondBrEncoding(const MCInst &MI, unsigned OpNo,
return 0;
}
-unsigned PPCMCCodeEmitter::getHA16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+unsigned PPCMCCodeEmitter::
+getAbsDirectBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
-
+
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_ha16));
+ (MCFixupKind)PPC::fixup_ppc_br24abs));
return 0;
}
-unsigned PPCMCCodeEmitter::getLO16Encoding(const MCInst &MI, unsigned OpNo,
- SmallVectorImpl<MCFixup> &Fixups) const {
+unsigned PPCMCCodeEmitter::
+getAbsCondBrEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
-
+
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16));
+ (MCFixupKind)PPC::fixup_ppc_brcond14abs));
+ return 0;
+}
+
+unsigned PPCMCCodeEmitter::getImm16Encoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ const MCOperand &MO = MI.getOperand(OpNo);
+ if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO, Fixups);
+
+ // Add a fixup for the immediate field.
+ Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16));
return 0;
}
@@ -170,8 +183,8 @@ unsigned PPCMCCodeEmitter::getMemRIEncoding(const MCInst &MI, unsigned OpNo,
return (getMachineOpValue(MI, MO, Fixups) & 0xFFFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16));
+ Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16));
return RegBits;
}
@@ -185,11 +198,11 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return (getMachineOpValue(MI, MO, Fixups) & 0x3FFF) | RegBits;
+ return ((getMachineOpValue(MI, MO, Fixups) >> 2) & 0x3FFF) | RegBits;
// Add a fixup for the displacement field.
- Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_lo16_ds));
+ Fixups.push_back(MCFixup::Create(2, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_half16ds));
return RegBits;
}
@@ -203,19 +216,29 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
- (MCFixupKind)PPC::fixup_ppc_tlsreg));
- return CTX.getRegisterInfo().getEncodingValue(PPC::X13);
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ return CTX.getRegisterInfo()->getEncodingValue(PPC::X13);
+}
+
+unsigned PPCMCCodeEmitter::getTLSCallEncoding(const MCInst &MI, unsigned OpNo,
+ SmallVectorImpl<MCFixup> &Fixups) const {
+ // For special TLS calls, we need two fixups; one for the branch target
+ // (__tls_get_addr), which we create via getDirectBrEncoding as usual,
+ // and one for the TLSGD or TLSLD symbol, which is emitted here.
+ const MCOperand &MO = MI.getOperand(OpNo+1);
+ Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
+ (MCFixupKind)PPC::fixup_ppc_nofixup));
+ return getDirectBrEncoding(MI, OpNo, Fixups);
}
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTCRF ||
- MI.getOpcode() == PPC::MFOCRF ||
- MI.getOpcode() == PPC::MTCRF8) &&
+ assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
- return 0x80 >> CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
}
@@ -223,11 +246,12 @@ unsigned PPCMCCodeEmitter::
getMachineOpValue(const MCInst &MI, const MCOperand &MO,
SmallVectorImpl<MCFixup> &Fixups) const {
if (MO.isReg()) {
- // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MFOCRF) ||
+ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
- return CTX.getRegisterInfo().getEncodingValue(MO.getReg());
+ return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
}
assert(MO.isImm() &&
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
new file mode 100644
index 0000000..d7e8402
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp
@@ -0,0 +1,155 @@
+//===-- PPCMCExpr.cpp - PPC specific MC expression classes ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppcmcexpr"
+#include "PPCMCExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+using namespace llvm;
+
+const PPCMCExpr*
+PPCMCExpr::Create(VariantKind Kind, const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return new (Ctx) PPCMCExpr(Kind, Expr, isDarwin);
+}
+
+void PPCMCExpr::PrintImpl(raw_ostream &OS) const {
+ if (isDarwinSyntax()) {
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO: OS << "lo16"; break;
+ case VK_PPC_HI: OS << "hi16"; break;
+ case VK_PPC_HA: OS << "ha16"; break;
+ }
+
+ OS << '(';
+ getSubExpr()->print(OS);
+ OS << ')';
+ } else {
+ getSubExpr()->print(OS);
+
+ switch (Kind) {
+ default: llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO: OS << "@l"; break;
+ case VK_PPC_HI: OS << "@h"; break;
+ case VK_PPC_HA: OS << "@ha"; break;
+ case VK_PPC_HIGHER: OS << "@higher"; break;
+ case VK_PPC_HIGHERA: OS << "@highera"; break;
+ case VK_PPC_HIGHEST: OS << "@highest"; break;
+ case VK_PPC_HIGHESTA: OS << "@highesta"; break;
+ }
+ }
+}
+
+bool
+PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const {
+ MCValue Value;
+
+ if (!Layout || !getSubExpr()->EvaluateAsRelocatable(Value, *Layout))
+ return false;
+
+ if (Value.isAbsolute()) {
+ int64_t Result = Value.getConstant();
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ Result = Result & 0xffff;
+ break;
+ case VK_PPC_HI:
+ Result = (Result >> 16) & 0xffff;
+ break;
+ case VK_PPC_HA:
+ Result = ((Result + 0x8000) >> 16) & 0xffff;
+ break;
+ case VK_PPC_HIGHER:
+ Result = (Result >> 32) & 0xffff;
+ break;
+ case VK_PPC_HIGHERA:
+ Result = ((Result + 0x8000) >> 32) & 0xffff;
+ break;
+ case VK_PPC_HIGHEST:
+ Result = (Result >> 48) & 0xffff;
+ break;
+ case VK_PPC_HIGHESTA:
+ Result = ((Result + 0x8000) >> 48) & 0xffff;
+ break;
+ }
+ Res = MCValue::get(Result);
+ } else {
+ MCContext &Context = Layout->getAssembler().getContext();
+ const MCSymbolRefExpr *Sym = Value.getSymA();
+ MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
+ if (Modifier != MCSymbolRefExpr::VK_None)
+ return false;
+ switch (Kind) {
+ default:
+ llvm_unreachable("Invalid kind!");
+ case VK_PPC_LO:
+ Modifier = MCSymbolRefExpr::VK_PPC_LO;
+ break;
+ case VK_PPC_HI:
+ Modifier = MCSymbolRefExpr::VK_PPC_HI;
+ break;
+ case VK_PPC_HA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HA;
+ break;
+ case VK_PPC_HIGHERA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHERA;
+ break;
+ case VK_PPC_HIGHER:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHER;
+ break;
+ case VK_PPC_HIGHEST:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHEST;
+ break;
+ case VK_PPC_HIGHESTA:
+ Modifier = MCSymbolRefExpr::VK_PPC_HIGHESTA;
+ break;
+ }
+ Sym = MCSymbolRefExpr::Create(&Sym->getSymbol(), Modifier, Context);
+ Res = MCValue::get(Sym, Value.getSymB(), Value.getConstant());
+ }
+
+ return true;
+}
+
+// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps
+// that method should be made public?
+static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) {
+ switch (Value->getKind()) {
+ case MCExpr::Target:
+ llvm_unreachable("Can't handle nested target expr!");
+
+ case MCExpr::Constant:
+ break;
+
+ case MCExpr::Binary: {
+ const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value);
+ AddValueSymbols_(BE->getLHS(), Asm);
+ AddValueSymbols_(BE->getRHS(), Asm);
+ break;
+ }
+
+ case MCExpr::SymbolRef:
+ Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol());
+ break;
+
+ case MCExpr::Unary:
+ AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm);
+ break;
+ }
+}
+
+void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const {
+ AddValueSymbols_(getSubExpr(), Asm);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
new file mode 100644
index 0000000..e44c7c1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h
@@ -0,0 +1,96 @@
+//===-- PPCMCExpr.h - PPC specific MC expression classes --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCEXPR_H
+#define PPCMCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmLayout.h"
+
+namespace llvm {
+
+class PPCMCExpr : public MCTargetExpr {
+public:
+ enum VariantKind {
+ VK_PPC_None,
+ VK_PPC_LO,
+ VK_PPC_HI,
+ VK_PPC_HA,
+ VK_PPC_HIGHER,
+ VK_PPC_HIGHERA,
+ VK_PPC_HIGHEST,
+ VK_PPC_HIGHESTA
+ };
+
+private:
+ const VariantKind Kind;
+ const MCExpr *Expr;
+ bool IsDarwin;
+
+ explicit PPCMCExpr(VariantKind _Kind, const MCExpr *_Expr,
+ bool _IsDarwin)
+ : Kind(_Kind), Expr(_Expr), IsDarwin(_IsDarwin) {}
+
+public:
+ /// @name Construction
+ /// @{
+
+ static const PPCMCExpr *Create(VariantKind Kind, const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx);
+
+ static const PPCMCExpr *CreateLo(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return Create(VK_PPC_LO, Expr, isDarwin, Ctx);
+ }
+
+ static const PPCMCExpr *CreateHi(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return Create(VK_PPC_HI, Expr, isDarwin, Ctx);
+ }
+
+ static const PPCMCExpr *CreateHa(const MCExpr *Expr,
+ bool isDarwin, MCContext &Ctx) {
+ return Create(VK_PPC_HA, Expr, isDarwin, Ctx);
+ }
+
+ /// @}
+ /// @name Accessors
+ /// @{
+
+ /// getOpcode - Get the kind of this expression.
+ VariantKind getKind() const { return Kind; }
+
+ /// getSubExpr - Get the child of this expression.
+ const MCExpr *getSubExpr() const { return Expr; }
+
+ /// isDarwinSyntax - True if expression is to be printed using Darwin syntax.
+ bool isDarwinSyntax() const { return IsDarwin; }
+
+
+ /// @}
+
+ void PrintImpl(raw_ostream &OS) const;
+ bool EvaluateAsRelocatableImpl(MCValue &Res,
+ const MCAsmLayout *Layout) const;
+ void AddValueSymbols(MCAssembler *) const;
+ const MCSection *FindAssociatedSection() const {
+ return getSubExpr()->FindAssociatedSection();
+ }
+
+ // There are no TLS PPCMCExprs at the moment.
+ void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const {}
+
+ static bool classof(const MCExpr *E) {
+ return E->getKind() == MCExpr::Target;
+ }
+};
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index 2209f93..f18d095 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -14,13 +14,16 @@
#include "PPCMCTargetDesc.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "PPCMCAsmInfo.h"
+#include "PPCTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MachineLocation.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
@@ -34,6 +37,9 @@
using namespace llvm;
+// Pin the vtable to this file.
+PPCTargetStreamer::~PPCTargetStreamer() {}
+
static MCInstrInfo *createPPCMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPPCMCInstrInfo(X);
@@ -42,7 +48,8 @@ static MCInstrInfo *createPPCMCInstrInfo() {
static MCRegisterInfo *createPPCMCRegisterInfo(StringRef TT) {
Triple TheTriple(TT);
- bool isPPC64 = (TheTriple.getArch() == Triple::ppc64);
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
unsigned Flavour = isPPC64 ? 0 : 1;
unsigned RA = isPPC64 ? PPC::LR8 : PPC::LR;
@@ -58,9 +65,10 @@ static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
return X;
}
-static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
+static MCAsmInfo *createPPCMCAsmInfo(const MCRegisterInfo &MRI, StringRef TT) {
Triple TheTriple(TT);
- bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+ bool isPPC64 = (TheTriple.getArch() == Triple::ppc64 ||
+ TheTriple.getArch() == Triple::ppc64le);
MCAsmInfo *MAI;
if (TheTriple.isOSDarwin())
@@ -69,9 +77,10 @@ static MCAsmInfo *createPPCMCAsmInfo(const Target &T, StringRef TT) {
MAI = new PPCLinuxMCAsmInfo(isPPC64);
// Initial state of the frame pointer is R1.
- MachineLocation Dst(MachineLocation::VirtualFP);
- MachineLocation Src(isPPC64? PPC::X1 : PPC::R1, 0);
- MAI->addInitialFrameState(0, Dst, Src);
+ unsigned Reg = isPPC64 ? PPC::X1 : PPC::R1;
+ MCCFIInstruction Inst =
+ MCCFIInstruction::createDefCfa(0, MRI.getDwarfRegNum(Reg, true), 0);
+ MAI->addInitialFrameState(Inst);
return MAI;
}
@@ -90,13 +99,37 @@ static MCCodeGenInfo *createPPCMCCodeGenInfo(StringRef TT, Reloc::Model RM,
}
if (CM == CodeModel::Default) {
Triple T(TT);
- if (!T.isOSDarwin() && T.getArch() == Triple::ppc64)
+ if (!T.isOSDarwin() &&
+ (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le))
CM = CodeModel::Medium;
}
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
+namespace {
+class PPCTargetAsmStreamer : public PPCTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ PPCTargetAsmStreamer(formatted_raw_ostream &OS) : OS(OS) {}
+ virtual void emitTCEntry(const MCSymbol &S) {
+ OS << "\t.tc ";
+ OS << S.getName();
+ OS << "[TC],";
+ OS << S.getName();
+ OS << '\n';
+ }
+};
+
+class PPCTargetELFStreamer : public PPCTargetStreamer {
+ virtual void emitTCEntry(const MCSymbol &S) {
+ // Creates a R_PPC64_TOC relocation
+ Streamer->EmitSymbolValue(&S, 8);
+ }
+};
+}
+
// This is duplicated code. Refactor this.
static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
MCContext &Ctx, MCAsmBackend &MAB,
@@ -107,7 +140,20 @@ static MCStreamer *createMCStreamer(const Target &T, StringRef TT,
if (Triple(TT).isOSDarwin())
return createMachOStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll, NoExecStack);
+ PPCTargetStreamer *S = new PPCTargetELFStreamer();
+ return createELFStreamer(Ctx, S, MAB, OS, Emitter, RelaxAll, NoExecStack);
+}
+
+static MCStreamer *
+createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
+ bool isVerboseAsm, bool useLoc, bool useCFI,
+ bool useDwarfDirectory, MCInstPrinter *InstPrint,
+ MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst) {
+ PPCTargetStreamer *S = new PPCTargetAsmStreamer(OS);
+
+ return llvm::createAsmStreamer(Ctx, S, OS, isVerboseAsm, useLoc, useCFI,
+ useDwarfDirectory, InstPrint, CE, TAB,
+ ShowInst);
}
static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
@@ -116,45 +162,65 @@ static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
- return new PPCInstPrinter(MAI, MII, MRI, SyntaxVariant);
+ bool isDarwin = Triple(STI.getTargetTriple()).isOSDarwin();
+ return new PPCInstPrinter(MAI, MII, MRI, isDarwin);
}
extern "C" void LLVMInitializePowerPCTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfoFn C(ThePPC32Target, createPPCMCAsmInfo);
RegisterMCAsmInfoFn D(ThePPC64Target, createPPCMCAsmInfo);
+ RegisterMCAsmInfoFn E(ThePPC64LETarget, createPPCMCAsmInfo);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(ThePPC32Target, createPPCMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(ThePPC64Target, createPPCMCCodeGenInfo);
+ TargetRegistry::RegisterMCCodeGenInfo(ThePPC64LETarget,
+ createPPCMCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(ThePPC64LETarget,
+ createPPCMCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(ThePPC32Target, createPPCMCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(ThePPC64Target, createPPCMCRegisterInfo);
+ TargetRegistry::RegisterMCRegInfo(ThePPC64LETarget, createPPCMCRegisterInfo);
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
createPPCMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
createPPCMCSubtargetInfo);
+ TargetRegistry::RegisterMCSubtargetInfo(ThePPC64LETarget,
+ createPPCMCSubtargetInfo);
// Register the MC Code Emitter
TargetRegistry::RegisterMCCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
TargetRegistry::RegisterMCCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
+ TargetRegistry::RegisterMCCodeEmitter(ThePPC64LETarget,
+ createPPCMCCodeEmitter);
// Register the asm backend.
TargetRegistry::RegisterMCAsmBackend(ThePPC32Target, createPPCAsmBackend);
TargetRegistry::RegisterMCAsmBackend(ThePPC64Target, createPPCAsmBackend);
+ TargetRegistry::RegisterMCAsmBackend(ThePPC64LETarget, createPPCAsmBackend);
// Register the object streamer.
TargetRegistry::RegisterMCObjectStreamer(ThePPC32Target, createMCStreamer);
TargetRegistry::RegisterMCObjectStreamer(ThePPC64Target, createMCStreamer);
+ TargetRegistry::RegisterMCObjectStreamer(ThePPC64LETarget, createMCStreamer);
+
+ // Register the asm streamer.
+ TargetRegistry::RegisterAsmStreamer(ThePPC32Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64Target, createMCAsmStreamer);
+ TargetRegistry::RegisterAsmStreamer(ThePPC64LETarget, createMCAsmStreamer);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(ThePPC32Target, createPPCMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(ThePPC64Target, createPPCMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(ThePPC64LETarget,
+ createPPCMCInstPrinter);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 38a7420..0b0ca24 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -33,18 +33,24 @@ class raw_ostream;
extern Target ThePPC32Target;
extern Target ThePPC64Target;
+extern Target ThePPC64LETarget;
MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI,
MCContext &Ctx);
-MCAsmBackend *createPPCAsmBackend(const Target &T, StringRef TT, StringRef CPU);
+MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+ StringRef TT, StringRef CPU);
/// createPPCELFObjectWriter - Construct an PPC ELF object writer.
MCObjectWriter *createPPCELFObjectWriter(raw_ostream &OS,
bool Is64Bit,
uint8_t OSABI);
+/// createPPCELFObjectWriter - Construct a PPC Mach-O object writer.
+MCObjectWriter *createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype);
} // End llvm namespace
// Generated files will use "namespace PPC". To avoid symbol clash,
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
new file mode 100644
index 0000000..bbafe2e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- PPCMachObjectWriter.cpp - PPC Mach-O Writer -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/PPCMCTargetDesc.h"
+#include "MCTargetDesc/PPCFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MachO.h"
+
+using namespace llvm;
+
+namespace {
+class PPCMachObjectWriter : public MCMachObjectTargetWriter {
+ bool RecordScatteredRelocation(MachObjectWriter *Writer,
+ const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue);
+
+ void RecordPPCRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup,
+ MCValue Target, uint64_t &FixedValue);
+
+public:
+ PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType, uint32_t CPUSubtype)
+ : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+ /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+ void RecordRelocation(MachObjectWriter *Writer, const MCAssembler &Asm,
+ const MCAsmLayout &Layout, const MCFragment *Fragment,
+ const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ if (Writer->is64Bit()) {
+ report_fatal_error("Relocation emission for MachO/PPC64 unimplemented.");
+ } else
+ RecordPPCRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ FixedValue);
+ }
+};
+}
+
+/// computes the log2 of the size of the relocation,
+/// used for relocation_info::r_length.
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+ switch (Kind) {
+ default:
+ report_fatal_error("log2size(FixupKind): Unhandled fixup kind!");
+ case FK_PCRel_1:
+ case FK_Data_1:
+ return 0;
+ case FK_PCRel_2:
+ case FK_Data_2:
+ return 1;
+ case FK_PCRel_4:
+ case PPC::fixup_ppc_brcond14:
+ case PPC::fixup_ppc_half16:
+ case PPC::fixup_ppc_br24:
+ case FK_Data_4:
+ return 2;
+ case FK_PCRel_8:
+ case FK_Data_8:
+ return 3;
+ }
+ return 0;
+}
+
+/// Translates generic PPC fixup kind to Mach-O/PPC relocation type enum.
+/// Outline based on PPCELFObjectWriter::getRelocTypeInner().
+static unsigned getRelocType(const MCValue &Target,
+ const MCFixupKind FixupKind, // from
+ // Fixup.getKind()
+ const bool IsPCRel) {
+ const MCSymbolRefExpr::VariantKind Modifier =
+ Target.isAbsolute() ? MCSymbolRefExpr::VK_None
+ : Target.getSymA()->getKind();
+ // determine the type of the relocation
+ unsigned Type = MachO::GENERIC_RELOC_VANILLA;
+ if (IsPCRel) { // relative to PC
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (relative)");
+ case PPC::fixup_ppc_br24:
+ Type = MachO::PPC_RELOC_BR24; // R_PPC_REL24
+ break;
+ case PPC::fixup_ppc_brcond14:
+ Type = MachO::PPC_RELOC_BR14;
+ break;
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16;
+ break;
+ }
+ break;
+ }
+ } else {
+ switch ((unsigned)FixupKind) {
+ default:
+ report_fatal_error("Unimplemented fixup kind (absolute)!");
+ case PPC::fixup_ppc_half16:
+ switch (Modifier) {
+ default:
+ llvm_unreachable("Unsupported modifier for half16 fixup");
+ case MCSymbolRefExpr::VK_PPC_HA:
+ Type = MachO::PPC_RELOC_HA16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_LO:
+ Type = MachO::PPC_RELOC_LO16_SECTDIFF;
+ break;
+ case MCSymbolRefExpr::VK_PPC_HI:
+ Type = MachO::PPC_RELOC_HI16_SECTDIFF;
+ break;
+ }
+ break;
+ case FK_Data_4:
+ break;
+ case FK_Data_2:
+ break;
+ }
+ }
+ return Type;
+}
+
+static void makeRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t FixupOffset, const uint32_t Index,
+ const unsigned IsPCRel, const unsigned Log2Size,
+ const unsigned IsExtern, const unsigned Type) {
+ MRE.r_word0 = FixupOffset;
+ // The bitfield offsets that work (as determined by trial-and-error)
+ // are different than what is documented in the mach-o manuals.
+ // This appears to be an endianness issue; reversing the order of the
+ // documented bitfields in <llvm/Support/MachO.h> fixes this (but
+ // breaks x86/ARM assembly).
+ MRE.r_word1 = ((Index << 8) | // was << 0
+ (IsPCRel << 7) | // was << 24
+ (Log2Size << 5) | // was << 25
+ (IsExtern << 4) | // was << 27
+ (Type << 0)); // was << 28
+}
+
+static void
+makeScatteredRelocationInfo(MachO::any_relocation_info &MRE,
+ const uint32_t Addr, const unsigned Type,
+ const unsigned Log2Size, const unsigned IsPCRel,
+ const uint32_t Value2) {
+ // For notes on bitfield positions and endianness, see:
+ // https://developer.apple.com/library/mac/documentation/developertools/conceptual/MachORuntime/Reference/reference.html#//apple_ref/doc/uid/20001298-scattered_relocation_entry
+ MRE.r_word0 = ((Addr << 0) | (Type << 24) | (Log2Size << 28) |
+ (IsPCRel << 30) | MachO::R_SCATTERED);
+ MRE.r_word1 = Value2;
+}
+
+/// Compute fixup offset (address).
+static uint32_t getFixupOffset(const MCAsmLayout &Layout,
+ const MCFragment *Fragment,
+ const MCFixup &Fixup) {
+ uint32_t FixupOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+ // On Mach-O, ppc_fixup_half16 relocations must refer to the
+ // start of the instruction, not the second halfword, as ELF does
+ if (unsigned(Fixup.getKind()) == PPC::fixup_ppc_half16)
+ FixupOffset &= ~uint32_t(3);
+ return FixupOffset;
+}
+
+/// \return false if falling back to using non-scattered relocation,
+/// otherwise true for normal scattered relocation.
+/// based on X86MachObjectWriter::RecordScatteredRelocation
+/// and ARMMachObjectWriter::RecordScatteredRelocation
+bool PPCMachObjectWriter::RecordScatteredRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ unsigned Log2Size, uint64_t &FixedValue) {
+ // caller already computes these, can we just pass and reuse?
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ const MCFixupKind FK = Fixup.getKind();
+ const unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned Type = getRelocType(Target, FK, IsPCRel);
+
+ // Is this a local or SECTDIFF relocation entry?
+ // SECTDIFF relocation entries have symbol subtractions,
+ // and require two entries, the first for the add-symbol value,
+ // the second for the subtract-symbol value.
+
+ // See <reloc.h>.
+ const MCSymbol *A = &Target.getSymA()->getSymbol();
+ MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+ if (!A_SD->getFragment())
+ report_fatal_error("symbol '" + A->getName() +
+ "' can not be undefined in a subtraction expression");
+
+ uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+ uint64_t SecAddr =
+ Writer->getSectionAddress(A_SD->getFragment()->getParent());
+ FixedValue += SecAddr;
+ uint32_t Value2 = 0;
+
+ if (const MCSymbolRefExpr *B = Target.getSymB()) {
+ MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+ if (!B_SD->getFragment())
+ report_fatal_error("symbol '" + B->getSymbol().getName() +
+ "' can not be undefined in a subtraction expression");
+
+ // FIXME: is Type correct? see include/llvm/Support/MachO.h
+ Value2 = Writer->getSymbolAddress(B_SD, Layout);
+ FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+ }
+ // FIXME: does FixedValue get used??
+
+ // Relocations are written out in reverse order, so the PAIR comes first.
+ if (Type == MachO::PPC_RELOC_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HI16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_HA16_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LO14_SECTDIFF ||
+ Type == MachO::PPC_RELOC_LOCAL_SECTDIFF) {
+ // X86 had this piece, but ARM does not
+ // If the offset is too large to fit in a scattered relocation,
+ // we're hosed. It's an unfortunate limitation of the MachO format.
+ if (FixupOffset > 0xffffff) {
+ char Buffer[32];
+ format("0x%x", FixupOffset).print(Buffer, sizeof(Buffer));
+ Asm.getContext().FatalError(Fixup.getLoc(),
+ Twine("Section too large, can't encode "
+ "r_address (") +
+ Buffer + ") into 24 bits of scattered "
+ "relocation entry.");
+ llvm_unreachable("fatal error returned?!");
+ }
+
+ // Is this supposed to follow MCTarget/PPCAsmBackend.cpp:adjustFixupValue()?
+ // see PPCMCExpr::EvaluateAsRelocatableImpl()
+ uint32_t other_half = 0;
+ switch (Type) {
+ case MachO::PPC_RELOC_LO16_SECTDIFF:
+ other_half = (FixedValue >> 16) & 0xffff;
+ // applyFixupOffset longer extracts the high part because it now assumes
+ // this was already done.
+ // It looks like this is not true for the FixedValue needed with Mach-O
+ // relocs.
+ // So we need to adjust FixedValue again here.
+ FixedValue &= 0xffff;
+ break;
+ case MachO::PPC_RELOC_HA16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue =
+ ((FixedValue >> 16) + ((FixedValue & 0x8000) ? 1 : 0)) & 0xffff;
+ break;
+ case MachO::PPC_RELOC_HI16_SECTDIFF:
+ other_half = FixedValue & 0xffff;
+ FixedValue = (FixedValue >> 16) & 0xffff;
+ break;
+ default:
+ llvm_unreachable("Invalid PPC scattered relocation type.");
+ break;
+ }
+
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, other_half, MachO::GENERIC_RELOC_PAIR,
+ Log2Size, IsPCRel, Value2);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ } else {
+ // If the offset is more than 24-bits, it won't fit in a scattered
+ // relocation offset field, so we fall back to using a non-scattered
+ // relocation. This is a bit risky, as if the offset reaches out of
+ // the block and the linker is doing scattered loading on this
+ // symbol, things can go badly.
+ //
+ // Required for 'as' compatibility.
+ if (FixupOffset > 0xffffff)
+ return false;
+ }
+ MachO::any_relocation_info MRE;
+ makeScatteredRelocationInfo(MRE, FixupOffset, Type, Log2Size, IsPCRel, Value);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+ return true;
+}
+
+// see PPCELFObjectWriter for a general outline of cases
+void PPCMachObjectWriter::RecordPPCRelocation(
+ MachObjectWriter *Writer, const MCAssembler &Asm, const MCAsmLayout &Layout,
+ const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target,
+ uint64_t &FixedValue) {
+ const MCFixupKind FK = Fixup.getKind(); // unsigned
+ const unsigned Log2Size = getFixupKindLog2Size(FK);
+ const bool IsPCRel = Writer->isFixupKindPCRel(Asm, FK);
+ const unsigned RelocType = getRelocType(Target, FK, IsPCRel);
+
+ // If this is a difference or a defined symbol plus an offset, then we need a
+ // scattered relocation entry. Differences always require scattered
+ // relocations.
+ if (Target.getSymB() &&
+ // Q: are branch targets ever scattered?
+ RelocType != MachO::PPC_RELOC_BR24 &&
+ RelocType != MachO::PPC_RELOC_BR14) {
+ RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+ Log2Size, FixedValue);
+ return;
+ }
+
+ // this doesn't seem right for RIT_PPC_BR24
+ // Get the symbol data, if any.
+ MCSymbolData *SD = 0;
+ if (Target.getSymA())
+ SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+ // See <reloc.h>.
+ const uint32_t FixupOffset = getFixupOffset(Layout, Fragment, Fixup);
+ unsigned Index = 0;
+ unsigned IsExtern = 0;
+ unsigned Type = RelocType;
+
+ if (Target.isAbsolute()) { // constant
+ // SymbolNum of 0 indicates the absolute section.
+ //
+ // FIXME: Currently, these are never generated (see code below). I cannot
+ // find a case where they are actually emitted.
+ report_fatal_error("FIXME: relocations to absolute targets "
+ "not yet implemented");
+ // the above line stolen from ARM, not sure
+ } else {
+ // Resolve constant variables.
+ if (SD->getSymbol().isVariable()) {
+ int64_t Res;
+ if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+ Res, Layout, Writer->getSectionAddressMap())) {
+ FixedValue = Res;
+ return;
+ }
+ }
+
+ // Check whether we need an external or internal relocation.
+ if (Writer->doesSymbolRequireExternRelocation(SD)) {
+ IsExtern = 1;
+ Index = SD->getIndex();
+ // For external relocations, make sure to offset the fixup value to
+ // compensate for the addend of the symbol address, if it was
+ // undefined. This occurs with weak definitions, for example.
+ if (!SD->Symbol->isUndefined())
+ FixedValue -= Layout.getSymbolOffset(SD);
+ } else {
+ // The index is the section ordinal (1-based).
+ const MCSectionData &SymSD =
+ Asm.getSectionData(SD->getSymbol().getSection());
+ Index = SymSD.getOrdinal() + 1;
+ FixedValue += Writer->getSectionAddress(&SymSD);
+ }
+ if (IsPCRel)
+ FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+ }
+
+ // struct relocation_info (8 bytes)
+ MachO::any_relocation_info MRE;
+ makeRelocationInfo(MRE, FixupOffset, Index, IsPCRel, Log2Size, IsExtern,
+ Type);
+ Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createPPCMachObjectWriter(raw_ostream &OS, bool Is64Bit,
+ uint32_t CPUType,
+ uint32_t CPUSubtype) {
+ return createMachObjectWriter(
+ new PPCMachObjectWriter(Is64Bit, CPUType, CPUSubtype), OS,
+ /*IsLittleEndian=*/false);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
index 853e505..63facc5 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp
@@ -26,6 +26,22 @@ PPC::Predicate PPC::InvertPredicate(PPC::Predicate Opcode) {
case PPC::PRED_LE: return PPC::PRED_GT;
case PPC::PRED_NU: return PPC::PRED_UN;
case PPC::PRED_UN: return PPC::PRED_NU;
+ case PPC::PRED_EQ_MINUS: return PPC::PRED_NE_PLUS;
+ case PPC::PRED_NE_MINUS: return PPC::PRED_EQ_PLUS;
+ case PPC::PRED_LT_MINUS: return PPC::PRED_GE_PLUS;
+ case PPC::PRED_GE_MINUS: return PPC::PRED_LT_PLUS;
+ case PPC::PRED_GT_MINUS: return PPC::PRED_LE_PLUS;
+ case PPC::PRED_LE_MINUS: return PPC::PRED_GT_PLUS;
+ case PPC::PRED_NU_MINUS: return PPC::PRED_UN_PLUS;
+ case PPC::PRED_UN_MINUS: return PPC::PRED_NU_PLUS;
+ case PPC::PRED_EQ_PLUS: return PPC::PRED_NE_MINUS;
+ case PPC::PRED_NE_PLUS: return PPC::PRED_EQ_MINUS;
+ case PPC::PRED_LT_PLUS: return PPC::PRED_GE_MINUS;
+ case PPC::PRED_GE_PLUS: return PPC::PRED_LT_MINUS;
+ case PPC::PRED_GT_PLUS: return PPC::PRED_LE_MINUS;
+ case PPC::PRED_LE_PLUS: return PPC::PRED_GT_MINUS;
+ case PPC::PRED_NU_PLUS: return PPC::PRED_UN_MINUS;
+ case PPC::PRED_UN_PLUS: return PPC::PRED_NU_MINUS;
}
llvm_unreachable("Unknown PPC branch opcode!");
}
@@ -40,6 +56,22 @@ PPC::Predicate PPC::getSwappedPredicate(PPC::Predicate Opcode) {
case PPC::PRED_LE: return PPC::PRED_GE;
case PPC::PRED_NU: return PPC::PRED_NU;
case PPC::PRED_UN: return PPC::PRED_UN;
+ case PPC::PRED_EQ_MINUS: return PPC::PRED_EQ_MINUS;
+ case PPC::PRED_NE_MINUS: return PPC::PRED_NE_MINUS;
+ case PPC::PRED_LT_MINUS: return PPC::PRED_GT_MINUS;
+ case PPC::PRED_GE_MINUS: return PPC::PRED_LE_MINUS;
+ case PPC::PRED_GT_MINUS: return PPC::PRED_LT_MINUS;
+ case PPC::PRED_LE_MINUS: return PPC::PRED_GE_MINUS;
+ case PPC::PRED_NU_MINUS: return PPC::PRED_NU_MINUS;
+ case PPC::PRED_UN_MINUS: return PPC::PRED_UN_MINUS;
+ case PPC::PRED_EQ_PLUS: return PPC::PRED_EQ_PLUS;
+ case PPC::PRED_NE_PLUS: return PPC::PRED_NE_PLUS;
+ case PPC::PRED_LT_PLUS: return PPC::PRED_GT_PLUS;
+ case PPC::PRED_GE_PLUS: return PPC::PRED_LE_PLUS;
+ case PPC::PRED_GT_PLUS: return PPC::PRED_LT_PLUS;
+ case PPC::PRED_LE_PLUS: return PPC::PRED_GE_PLUS;
+ case PPC::PRED_NU_PLUS: return PPC::PRED_NU_PLUS;
+ case PPC::PRED_UN_PLUS: return PPC::PRED_UN_PLUS;
}
llvm_unreachable("Unknown PPC branch opcode!");
}
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
index 444758c..d498c2f 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h
@@ -25,14 +25,30 @@ namespace llvm {
namespace PPC {
/// Predicate - These are "(BI << 5) | BO" for various predicates.
enum Predicate {
- PRED_LT = (0 << 5) | 12,
- PRED_LE = (1 << 5) | 4,
- PRED_EQ = (2 << 5) | 12,
- PRED_GE = (0 << 5) | 4,
- PRED_GT = (1 << 5) | 12,
- PRED_NE = (2 << 5) | 4,
- PRED_UN = (3 << 5) | 12,
- PRED_NU = (3 << 5) | 4
+ PRED_LT = (0 << 5) | 12,
+ PRED_LE = (1 << 5) | 4,
+ PRED_EQ = (2 << 5) | 12,
+ PRED_GE = (0 << 5) | 4,
+ PRED_GT = (1 << 5) | 12,
+ PRED_NE = (2 << 5) | 4,
+ PRED_UN = (3 << 5) | 12,
+ PRED_NU = (3 << 5) | 4,
+ PRED_LT_MINUS = (0 << 5) | 14,
+ PRED_LE_MINUS = (1 << 5) | 6,
+ PRED_EQ_MINUS = (2 << 5) | 14,
+ PRED_GE_MINUS = (0 << 5) | 6,
+ PRED_GT_MINUS = (1 << 5) | 14,
+ PRED_NE_MINUS = (2 << 5) | 6,
+ PRED_UN_MINUS = (3 << 5) | 14,
+ PRED_NU_MINUS = (3 << 5) | 6,
+ PRED_LT_PLUS = (0 << 5) | 15,
+ PRED_LE_PLUS = (1 << 5) | 7,
+ PRED_EQ_PLUS = (2 << 5) | 15,
+ PRED_GE_PLUS = (0 << 5) | 7,
+ PRED_GT_PLUS = (1 << 5) | 15,
+ PRED_NE_PLUS = (2 << 5) | 7,
+ PRED_UN_PLUS = (3 << 5) | 15,
+ PRED_NU_PLUS = (3 << 5) | 7
};
/// Invert the specified predicate. != -> ==, < -> >=.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.h b/contrib/llvm/lib/Target/PowerPC/PPC.h
index b4be51a..f0d5af2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.h
@@ -30,7 +30,10 @@ namespace llvm {
class AsmPrinter;
class MCInst;
- FunctionPass *createPPCCTRLoops();
+ FunctionPass *createPPCCTRLoops(PPCTargetMachine &TM);
+#ifndef NDEBUG
+ FunctionPass *createPPCCTRLoopsVerify();
+#endif
FunctionPass *createPPCEarlyReturnPass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
@@ -71,18 +74,21 @@ namespace llvm {
/// The next are not flags but distinct values.
MO_ACCESS_MASK = 0xf0,
- /// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
- MO_LO16 = 1 << 4,
- MO_HA16 = 2 << 4,
+ /// MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
+ MO_LO = 1 << 4,
+ MO_HA = 2 << 4,
- MO_TPREL16_HA = 3 << 4,
- MO_TPREL16_LO = 4 << 4,
+ MO_TPREL_LO = 4 << 4,
+ MO_TPREL_HA = 3 << 4,
/// These values identify relocations on immediates folded
/// into memory operations.
- MO_DTPREL16_LO = 5 << 4,
- MO_TLSLD16_LO = 6 << 4,
- MO_TOC16_LO = 7 << 4
+ MO_DTPREL_LO = 5 << 4,
+ MO_TLSLD_LO = 6 << 4,
+ MO_TOC_LO = 7 << 4,
+
+ // Symbol for VK_PPC_TLS fixup attached to an ADD instruction
+ MO_TLS = 8 << 4
};
} // end namespace PPCII
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index eb73c67..54e3d40 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -57,6 +57,8 @@ def FeatureMFOCRF : SubtargetFeature<"mfocrf","HasMFOCRF", "true",
"Enable the MFOCRF instruction">;
def FeatureFSqrt : SubtargetFeature<"fsqrt","HasFSQRT", "true",
"Enable the fsqrt instruction">;
+def FeatureFCPSGN : SubtargetFeature<"fcpsgn", "HasFCPSGN", "true",
+ "Enable the fcpsgn instruction">;
def FeatureFRE : SubtargetFeature<"fre", "HasFRE", "true",
"Enable the fre instruction">;
def FeatureFRES : SubtargetFeature<"fres", "HasFRES", "true",
@@ -85,6 +87,13 @@ def FeatureBookE : SubtargetFeature<"booke", "IsBookE", "true",
"Enable Book E instructions">;
def FeatureQPX : SubtargetFeature<"qpx","HasQPX", "true",
"Enable QPX instructions">;
+def FeatureVSX : SubtargetFeature<"vsx","HasVSX", "true",
+ "Enable VSX instructions">;
+
+def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
+ "Treat mftb as deprecated">;
+def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
+ "Treat vector data stream cache control instructions as deprecated">;
// Note: Future features to add when support is extended to more
// recent ISA levels:
@@ -146,10 +155,10 @@ include "PPCInstrInfo.td"
def : Processor<"generic", G3Itineraries, [Directive32]>;
def : Processor<"440", PPC440Itineraries, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE]>;
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"450", PPC440Itineraries, [Directive440, FeatureISEL,
FeatureFRES, FeatureFRSQRTE,
- FeatureBookE]>;
+ FeatureBookE, DeprecatedMFTB]>;
def : Processor<"601", G3Itineraries, [Directive601]>;
def : Processor<"602", G3Itineraries, [Directive602]>;
def : Processor<"603", G3Itineraries, [Directive603,
@@ -185,29 +194,32 @@ def : ProcessorModel<"g5", G5Model,
[Directive970, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureSTFIWX,
FeatureFRES, FeatureFRSQRTE,
- Feature64Bit /*, Feature64BitRegs */]>;
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"e500mc", PPCE500mcModel,
[DirectiveE500mc, FeatureMFOCRF,
- FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
def : ProcessorModel<"e5500", PPCE5500Model,
[DirectiveE5500, FeatureMFOCRF, Feature64Bit,
- FeatureSTFIWX, FeatureBookE, FeatureISEL]>;
+ FeatureSTFIWX, FeatureBookE, FeatureISEL,
+ DeprecatedMFTB]>;
def : ProcessorModel<"a2", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
- /*, Feature64BitRegs */]>;
+ /*, Feature64BitRegs */, DeprecatedMFTB]>;
def : ProcessorModel<"a2q", PPCA2Model,
[DirectiveA2, FeatureBookE, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX, Feature64Bit
- /*, Feature64BitRegs */, FeatureQPX]>;
+ /*, Feature64BitRegs */, FeatureQPX, DeprecatedMFTB]>;
def : ProcessorModel<"pwr3", G5Model,
[DirectivePwr3, FeatureAltivec,
FeatureFRES, FeatureFRSQRTE, FeatureMFOCRF,
@@ -220,38 +232,48 @@ def : ProcessorModel<"pwr5", G5Model,
[DirectivePwr5, FeatureAltivec, FeatureMFOCRF,
FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureSTFIWX, Feature64Bit]>;
+ FeatureSTFIWX, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr5x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES,
- FeatureSTFIWX, FeatureFPRND, Feature64Bit]>;
+ FeatureSTFIWX, FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6", G5Model,
[DirectivePwr6, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, Feature64Bit /*, Feature64BitRegs */]>;
+ FeatureFPRND, Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr6x", G5Model,
[DirectivePwr5x, FeatureAltivec, FeatureMFOCRF,
- FeatureFSqrt, FeatureFRE, FeatureFRES,
+ FeatureFCPSGN, FeatureFSqrt, FeatureFRE, FeatureFRES,
FeatureFRSQRTE, FeatureFRSQRTES, FeatureRecipPrec,
FeatureSTFIWX, FeatureLFIWAX,
- FeatureFPRND, Feature64Bit]>;
+ FeatureFPRND, Feature64Bit,
+ DeprecatedMFTB, DeprecatedDST]>;
def : ProcessorModel<"pwr7", G5Model,
[DirectivePwr7, FeatureAltivec,
- FeatureMFOCRF, FeatureFSqrt, FeatureFRE,
+ FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE,
FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES,
FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX,
FeatureFPRND, FeatureFPCVT, FeatureISEL,
FeaturePOPCNTD, FeatureLDBRX,
- Feature64Bit /*, Feature64BitRegs */]>;
+ Feature64Bit /*, Feature64BitRegs */,
+ DeprecatedMFTB, DeprecatedDST]>;
def : Processor<"ppc", G3Itineraries, [Directive32]>;
def : ProcessorModel<"ppc64", G5Model,
[Directive64, FeatureAltivec,
FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
FeatureFRSQRTE, FeatureSTFIWX,
Feature64Bit /*, Feature64BitRegs */]>;
+def : ProcessorModel<"ppc64le", G5Model,
+ [Directive64, FeatureAltivec,
+ FeatureMFOCRF, FeatureFSqrt, FeatureFRES,
+ FeatureFRSQRTE, FeatureSTFIWX,
+ Feature64Bit /*, Feature64BitRegs */]>;
//===----------------------------------------------------------------------===//
// Calling Conventions
@@ -272,10 +294,20 @@ def PPCAsmParser : AsmParser {
let ShouldEmitMatchRegisterName = 0;
}
+def PPCAsmParserVariant : AsmParserVariant {
+ int Variant = 0;
+
+ // We do not use hard coded registers in asm strings. However, some
+ // InstAlias definitions use immediate literals. Set RegisterPrefix
+ // so that those are not misinterpreted as registers.
+ string RegisterPrefix = "%";
+}
+
def PPC : Target {
// Information about the instructions.
let InstructionSet = PPCInstrInfo;
let AssemblyWriters = [PPCAsmWriter];
let AssemblyParsers = [PPCAsmParser];
+ let AssemblyParserVariants = [PPCAsmParserVariant];
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 3c7cc4e..ada34ed 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -20,8 +20,10 @@
#include "PPC.h"
#include "InstPrinter/PPCInstPrinter.h"
#include "MCTargetDesc/PPCPredicates.h"
+#include "MCTargetDesc/PPCMCExpr.h"
#include "PPCSubtarget.h"
#include "PPCTargetMachine.h"
+#include "PPCTargetStreamer.h"
#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -85,18 +87,6 @@ namespace {
bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O);
-
- MachineLocation getDebugValueLocation(const MachineInstr *MI) const {
- MachineLocation Location;
- assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
- // Frame address. Currently handles register +- offset only.
- if (MI->getOperand(0).isReg() && MI->getOperand(2).isImm())
- Location.set(MI->getOperand(0).getReg(), MI->getOperand(2).getImm());
- else {
- DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n");
- }
- return Location;
- }
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@@ -213,7 +203,7 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
.getGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else if (GV->isDeclaration() || GV->hasCommonLinkage() ||
GV->hasAvailableExternallyLinkage()) {
SymToPrint = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr");
@@ -223,12 +213,12 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
getHiddenGVStubEntry(SymToPrint);
if (StubSym.getPointer() == 0)
StubSym = MachineModuleInfoImpl::
- StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage());
+ StubValueTy(getSymbol(GV), !GV->hasInternalLinkage());
} else {
- SymToPrint = Mang->getSymbol(GV);
+ SymToPrint = getSymbol(GV);
}
} else {
- SymToPrint = Mang->getSymbol(GV);
+ SymToPrint = getSymbol(GV);
}
O << *SymToPrint;
@@ -339,28 +329,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// Lower multi-instruction pseudo operations.
switch (MI->getOpcode()) {
default: break;
- case TargetOpcode::DBG_VALUE: {
- if (!isVerbose() || !OutStreamer.hasRawTextSupport()) return;
-
- SmallString<32> Str;
- raw_svector_ostream O(Str);
- unsigned NOps = MI->getNumOperands();
- assert(NOps==4);
- O << '\t' << MAI->getCommentString() << "DEBUG_VALUE: ";
- // cast away const; DIetc do not take const operands for some reason.
- DIVariable V(const_cast<MDNode *>(MI->getOperand(NOps-1).getMetadata()));
- O << V.getName();
- O << " <- ";
- // Frame address. Currently handles register +- offset only.
- assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm());
- O << '['; printOperand(MI, 0, O); O << '+'; printOperand(MI, 1, O);
- O << ']';
- O << "+";
- printOperand(MI, NOps-2, O);
- OutStreamer.EmitRawText(O.str());
- return;
- }
-
+ case TargetOpcode::DBG_VALUE:
+ llvm_unreachable("Should be handled target independently");
case PPC::MovePCtoLR:
case PPC::MovePCtoLR8: {
// Transform %LR = MovePCtoLR
@@ -394,7 +364,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(MO.isGlobal() || MO.isCPI() || MO.isJTI());
MCSymbol *MOSymbol = 0;
if (MO.isGlobal())
- MOSymbol = Mang->getSymbol(MO.getGlobal());
+ MOSymbol = getSymbol(MO.getGlobal());
else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
else if (MO.isJTI())
@@ -403,7 +373,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbol *TOCEntry = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC_ENTRY,
+ MCSymbolRefExpr::Create(TOCEntry, MCSymbolRefExpr::VK_PPC_TOC,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -433,7 +403,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsCommon = GVar && RealGValue->hasCommonLinkage();
@@ -444,11 +414,12 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
else if (MO.isJTI())
MOSymbol = GetJTISymbol(MO.getIndex());
- if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI())
+ if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_HA,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -469,23 +440,27 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
if (MO.isJTI())
MOSymbol = lookUpOrCreateTOCEntry(GetJTISymbol(MO.getIndex()));
- else if (MO.isCPI())
+ else if (MO.isCPI()) {
MOSymbol = GetCPISymbol(MO.getIndex());
+ if (TM.getCodeModel() == CodeModel::Large)
+ MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
+ }
else if (MO.isGlobal()) {
const GlobalValue *GValue = MO.getGlobal();
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
if (!GVar || !GVar->hasInitializer() || RealGValue->hasCommonLinkage() ||
- RealGValue->hasAvailableExternallyLinkage())
+ RealGValue->hasAvailableExternallyLinkage() ||
+ TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
}
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -510,18 +485,18 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
const GlobalAlias *GAlias = dyn_cast<GlobalAlias>(GValue);
const GlobalValue *RealGValue = GAlias ?
GAlias->resolveAliasedGlobal(false) : GValue;
- MOSymbol = Mang->getSymbol(RealGValue);
+ MOSymbol = getSymbol(RealGValue);
const GlobalVariable *GVar = dyn_cast<GlobalVariable>(RealGValue);
IsExternal = GVar && !GVar->hasInitializer();
IsFunction = !GVar;
} else if (MO.isCPI())
MOSymbol = GetCPISymbol(MO.getIndex());
- if (IsFunction || IsExternal)
+ if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large)
MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC16_LO,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_LO,
OutContext);
TmpInst.getOperand(2) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -533,9 +508,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTprel =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
@@ -551,9 +526,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
TmpInst.setOpcode(PPC::LD);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *Exp =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
@@ -565,9 +540,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
@@ -581,9 +556,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsGD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
@@ -593,7 +568,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::GETtlsADDR: {
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLSGD __tls_get_addr(sym@tlsgd)
+ // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
@@ -602,11 +577,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSGD)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -617,9 +592,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
@@ -633,9 +608,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymGotTlsLD =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
@@ -645,7 +620,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
case PPC::GETtlsldADDR: {
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
- // Into: BL8_NOP_TLSLD __tls_get_addr(sym@tlsld)
+ // Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
@@ -654,11 +629,11 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSLD,
OutContext);
- OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLSLD)
+ OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
@@ -669,9 +644,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_HA,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
@@ -685,9 +660,9 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
- MCSymbol *MOSymbol = Mang->getSymbol(GValue);
+ MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymDtprel =
- MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL16_LO,
+ MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_DTPREL_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8)
.addReg(MI->getOperand(0).getReg())
@@ -695,21 +670,63 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymDtprel));
return;
}
- case PPC::MFCRpseud:
- case PPC::MFCR8pseud:
- // Transform: %R3 = MFCRpseud %CR7
- // Into: %R3 = MFCR ;; cr7
- OutStreamer.AddComment(PPCInstPrinter::
- getRegisterName(MI->getOperand(1).getReg()));
- OutStreamer.EmitInstruction(MCInstBuilder(Subtarget.isPPC64() ? PPC::MFCR8 : PPC::MFCR)
- .addReg(MI->getOperand(0).getReg()));
- return;
+ case PPC::MFOCRF:
+ case PPC::MFOCRF8:
+ if (!Subtarget.hasMFOCRF()) {
+ // Transform: %R3 = MFOCRF %CR7
+ // Into: %R3 = MFCR ;; cr7
+ unsigned NewOpcode =
+ MI->getOpcode() == PPC::MFOCRF ? PPC::MFCR : PPC::MFCR8;
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(1).getReg()));
+ OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
+ .addReg(MI->getOperand(0).getReg()));
+ return;
+ }
+ break;
+ case PPC::MTOCRF:
+ case PPC::MTOCRF8:
+ if (!Subtarget.hasMFOCRF()) {
+ // Transform: %CR7 = MTOCRF %R3
+ // Into: MTCRF mask, %R3 ;; cr7
+ unsigned NewOpcode =
+ MI->getOpcode() == PPC::MTOCRF ? PPC::MTCRF : PPC::MTCRF8;
+ unsigned Mask = 0x80 >> OutContext.getRegisterInfo()
+ ->getEncodingValue(MI->getOperand(0).getReg());
+ OutStreamer.AddComment(PPCInstPrinter::
+ getRegisterName(MI->getOperand(0).getReg()));
+ OutStreamer.EmitInstruction(MCInstBuilder(NewOpcode)
+ .addImm(Mask)
+ .addReg(MI->getOperand(1).getReg()));
+ return;
+ }
+ break;
case PPC::SYNC:
// In Book E sync is called msync, handle this special case here...
if (Subtarget.isBookE()) {
OutStreamer.EmitRawText(StringRef("\tmsync"));
return;
}
+ break;
+ case PPC::LD:
+ case PPC::STD:
+ case PPC::LWA_32:
+ case PPC::LWA: {
+ // Verify alignment is legal, so we don't create relocations
+ // that can't be supported.
+ // FIXME: This test is currently disabled for Darwin. The test
+ // suite shows a handful of test cases that fail this check for
+ // Darwin. Those need to be investigated before this sanity test
+ // can be enabled for those subtargets.
+ if (!Subtarget.isDarwin()) {
+ unsigned OpNum = (MI->getOpcode() == PPC::STD) ? 2 : 1;
+ const MachineOperand &MO = MI->getOperand(OpNum);
+ if (MO.isGlobal() && MO.getGlobal()->getAlignment() < 4)
+ llvm_unreachable("Global must be word-aligned for LD, STD, LWA!");
+ }
+ // Now process the instruction normally.
+ break;
+ }
}
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
@@ -737,7 +754,7 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
MCSymbol *Symbol2 = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
// Generates a R_PPC64_TOC relocation for TOC base insertion.
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Symbol2,
- MCSymbolRefExpr::VK_PPC_TOC, OutContext),
+ MCSymbolRefExpr::VK_PPC_TOCBASE, OutContext),
8/*size*/);
// Emit a null environment pointer.
OutStreamer.EmitIntValue(0, 8 /* size */);
@@ -755,6 +772,9 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
bool isPPC64 = TD->getPointerSizeInBits() == 64;
+ PPCTargetStreamer &TS =
+ static_cast<PPCTargetStreamer &>(OutStreamer.getTargetStreamer());
+
if (isPPC64 && !TOC.empty()) {
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".toc",
ELF::SHT_PROGBITS, ELF::SHF_WRITE | ELF::SHF_ALLOC,
@@ -765,7 +785,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
E = TOC.end(); I != E; ++I) {
OutStreamer.EmitLabel(I->second);
MCSymbol *S = OutContext.GetOrCreateSymbol(I->first->getName());
- OutStreamer.EmitTCEntry(*S);
+ TS.emitTCEntry(*S);
}
}
@@ -781,7 +801,7 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
// .long _foo
OutStreamer.EmitValue(MCSymbolRefExpr::Create(Stubs[i].second.getPointer(),
OutContext),
- isPPC64 ? 8 : 4/*size*/, 0/*addrspace*/);
+ isPPC64 ? 8 : 4/*size*/);
}
Stubs.clear();
@@ -829,7 +849,8 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
"power6",
"power6x",
"power7",
- "ppc64"
+ "ppc64",
+ "ppc64le"
};
unsigned Directive = Subtarget.getDarwinDirective();
@@ -843,7 +864,7 @@ void PPCDarwinAsmPrinter::EmitStartOfAsmFile(Module &M) {
// FIXME: This is a total hack, finish mc'izing the PPC backend.
if (OutStreamer.hasRawTextSupport()) {
- assert(Directive < sizeof(CPUDirectives) / sizeof(*CPUDirectives) &&
+ assert(Directive < array_lengthof(CPUDirectives) &&
"CPUDirectives[] might not be up-to-date!");
OutStreamer.EmitRawText("\t.machine " + Twine(CPUDirectives[Directive]));
}
@@ -883,6 +904,7 @@ static MCSymbol *GetAnonSym(MCSymbol *Sym, MCContext &Ctx) {
void PPCDarwinAsmPrinter::
EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
bool isPPC64 = TM.getDataLayout()->getPointerSizeInBits() == 64;
+ bool isDarwin = Subtarget.isDarwin();
const TargetLoweringObjectFileMachO &TLOFMacho =
static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
@@ -910,6 +932,9 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
const MCExpr *Anon = MCSymbolRefExpr::Create(AnonSymbol, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
+ const MCExpr *Sub =
+ MCBinaryExpr::CreateSub(LazyPtrExpr, Anon, OutContext);
// mflr r0
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R0));
@@ -919,21 +944,20 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
// mflr r11
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MFLR).addReg(PPC::R11));
// addis r11, r11, ha16(LazyPtr - AnonSymbol)
- const MCExpr *Sub =
- MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(LazyPtr, OutContext),
- Anon, OutContext);
+ const MCExpr *SubHa16 = PPCMCExpr::CreateHa(Sub, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS)
.addReg(PPC::R11)
.addReg(PPC::R11)
- .addExpr(Sub));
+ .addExpr(SubHa16));
// mtlr r0
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTLR).addReg(PPC::R0));
// ldu r12, lo16(LazyPtr - AnonSymbol)(r11)
// lwzu r12, lo16(LazyPtr - AnonSymbol)(r11)
+ const MCExpr *SubLo16 = PPCMCExpr::CreateLo(Sub, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
- .addExpr(Sub).addExpr(Sub)
+ .addExpr(SubLo16).addExpr(SubLo16)
.addReg(PPC::R11));
// mtctr r12
OutStreamer.EmitInstruction(MCInstBuilder(PPC::MTCTR).addReg(PPC::R12));
@@ -967,24 +991,24 @@ EmitFunctionStubs(const MachineModuleInfoMachO::SymbolListTy &Stubs) {
MCSymbol *Stub = Stubs[i].first;
MCSymbol *RawSym = Stubs[i].second.getPointer();
MCSymbol *LazyPtr = GetLazyPtr(Stub, OutContext);
+ const MCExpr *LazyPtrExpr = MCSymbolRefExpr::Create(LazyPtr, OutContext);
OutStreamer.SwitchSection(StubSection);
EmitAlignment(4);
OutStreamer.EmitLabel(Stub);
OutStreamer.EmitSymbolAttribute(RawSym, MCSA_IndirectSymbol);
+
// lis r11, ha16(LazyPtr)
const MCExpr *LazyPtrHa16 =
- MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_HA16,
- OutContext);
+ PPCMCExpr::CreateHa(LazyPtrExpr, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::LIS)
.addReg(PPC::R11)
.addExpr(LazyPtrHa16));
- const MCExpr *LazyPtrLo16 =
- MCSymbolRefExpr::Create(LazyPtr, MCSymbolRefExpr::VK_PPC_DARWIN_LO16,
- OutContext);
// ldu r12, lo16(LazyPtr)(r11)
// lwzu r12, lo16(LazyPtr)(r11)
+ const MCExpr *LazyPtrLo16 =
+ PPCMCExpr::CreateLo(LazyPtrExpr, isDarwin, OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(isPPC64 ? PPC::LDU : PPC::LWZU)
.addReg(PPC::R12)
.addExpr(LazyPtrLo16).addExpr(LazyPtrLo16)
@@ -1037,7 +1061,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
MCSymbol *NLPSym = GetSymbolWithGlobalValueBase(*I, "$non_lazy_ptr");
MachineModuleInfoImpl::StubValueTy &StubSym =
MMIMacho.getGVStubEntry(NLPSym);
- StubSym = MachineModuleInfoImpl::StubValueTy(Mang->getSymbol(*I), true);
+ StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(*I), true);
}
}
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 81a54d7..4224ae2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -9,767 +9,641 @@
//
// This pass identifies loops where we can generate the PPC branch instructions
// that decrement and test the count register (CTR) (bdnz and friends).
-// This pass is based on the HexagonHardwareLoops pass.
//
// The pattern that defines the induction variable can changed depending on
// prior optimizations. For example, the IndVarSimplify phase run by 'opt'
// normalizes induction variables, and the Loop Strength Reduction pass
// run by 'llc' may also make changes to the induction variable.
-// The pattern detected by this phase is due to running Strength Reduction.
//
// Criteria for CTR loops:
// - Countable loops (w/ ind. var for a trip count)
-// - Assumes loops are normalized by IndVarSimplify
// - Try inner-most loops first
// - No nested CTR loops.
// - No function calls in loops.
//
-// Note: As with unconverted loops, PPCBranchSelector must be run after this
-// pass in order to convert long-displacement jumps into jump pairs.
-//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ctrloops"
-#include "PPC.h"
-#include "MCTargetDesc/PPCPredicates.h"
-#include "PPCTargetMachine.h"
-#include "llvm/ADT/DenseMap.h"
+
+#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Target/TargetLibraryInfo.h"
+#include "PPCTargetMachine.h"
+#include "PPC.h"
+
+#ifndef NDEBUG
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#endif
+
#include <algorithm>
+#include <vector>
using namespace llvm;
+#ifndef NDEBUG
+static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
+#endif
+
STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
namespace llvm {
void initializePPCCTRLoopsPass(PassRegistry&);
+#ifndef NDEBUG
+ void initializePPCCTRLoopsVerifyPass(PassRegistry&);
+#endif
}
namespace {
- class CountValue;
- struct PPCCTRLoops : public MachineFunctionPass {
- MachineLoopInfo *MLI;
- MachineRegisterInfo *MRI;
- const TargetInstrInfo *TII;
+ struct PPCCTRLoops : public FunctionPass {
+
+#ifndef NDEBUG
+ static int Counter;
+#endif
public:
- static char ID; // Pass identification, replacement for typeid
+ static char ID;
- PPCCTRLoops() : MachineFunctionPass(ID) {
+ PPCCTRLoops() : FunctionPass(ID), TM(0) {
+ initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
+ }
+ PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
}
- virtual bool runOnMachineFunction(MachineFunction &MF);
-
- const char *getPassName() const { return "PPC CTR Loops"; }
+ virtual bool runOnFunction(Function &F);
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addRequired<MachineLoopInfo>();
- AU.addPreserved<MachineLoopInfo>();
- MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<LoopInfo>();
+ AU.addPreserved<LoopInfo>();
+ AU.addRequired<DominatorTree>();
+ AU.addPreserved<DominatorTree>();
+ AU.addRequired<ScalarEvolution>();
}
private:
- /// getCanonicalInductionVariable - Check to see if the loop has a canonical
- /// induction variable.
- /// Should be defined in MachineLoop. Based upon version in class Loop.
- void getCanonicalInductionVariable(MachineLoop *L,
- SmallVector<MachineInstr *, 4> &IVars,
- SmallVector<MachineInstr *, 4> &IOps) const;
-
- /// getTripCount - Return a loop-invariant LLVM register indicating the
- /// number of times the loop will be executed. If the trip-count cannot
- /// be determined, this return null.
- CountValue *getTripCount(MachineLoop *L,
- SmallVector<MachineInstr *, 2> &OldInsts) const;
-
- /// isInductionOperation - Return true if the instruction matches the
- /// pattern for an opertion that defines an induction variable.
- bool isInductionOperation(const MachineInstr *MI, unsigned IVReg) const;
-
- /// isInvalidOperation - Return true if the instruction is not valid within
- /// a CTR loop.
- bool isInvalidLoopOperation(const MachineInstr *MI) const;
-
- /// containsInavlidInstruction - Return true if the loop contains an
- /// instruction that inhibits using the CTR loop.
- bool containsInvalidInstruction(MachineLoop *L) const;
-
- /// converToCTRLoop - Given a loop, check if we can convert it to a
- /// CTR loop. If so, then perform the conversion and return true.
- bool convertToCTRLoop(MachineLoop *L);
-
- /// isDead - Return true if the instruction is now dead.
- bool isDead(const MachineInstr *MI,
- SmallVector<MachineInstr *, 1> &DeadPhis) const;
-
- /// removeIfDead - Remove the instruction if it is now dead.
- void removeIfDead(MachineInstr *MI);
+ bool mightUseCTR(const Triple &TT, BasicBlock *BB);
+ bool convertToCTRLoop(Loop *L);
+
+ private:
+ PPCTargetMachine *TM;
+ LoopInfo *LI;
+ ScalarEvolution *SE;
+ DataLayout *TD;
+ DominatorTree *DT;
+ const TargetLibraryInfo *LibInfo;
};
char PPCCTRLoops::ID = 0;
+#ifndef NDEBUG
+ int PPCCTRLoops::Counter = 0;
+#endif
-
- // CountValue class - Abstraction for a trip count of a loop. A
- // smaller vesrsion of the MachineOperand class without the concerns
- // of changing the operand representation.
- class CountValue {
+#ifndef NDEBUG
+ struct PPCCTRLoopsVerify : public MachineFunctionPass {
public:
- enum CountValueType {
- CV_Register,
- CV_Immediate
- };
- private:
- CountValueType Kind;
- union Values {
- unsigned RegNum;
- int64_t ImmVal;
- Values(unsigned r) : RegNum(r) {}
- Values(int64_t i) : ImmVal(i) {}
- } Contents;
- bool isNegative;
+ static char ID;
- public:
- CountValue(unsigned r, bool neg) : Kind(CV_Register), Contents(r),
- isNegative(neg) {}
- explicit CountValue(int64_t i) : Kind(CV_Immediate), Contents(i),
- isNegative(i < 0) {}
- CountValueType getType() const { return Kind; }
- bool isReg() const { return Kind == CV_Register; }
- bool isImm() const { return Kind == CV_Immediate; }
- bool isNeg() const { return isNegative; }
-
- unsigned getReg() const {
- assert(isReg() && "Wrong CountValue accessor");
- return Contents.RegNum;
- }
- void setReg(unsigned Val) {
- Contents.RegNum = Val;
- }
- int64_t getImm() const {
- assert(isImm() && "Wrong CountValue accessor");
- if (isNegative) {
- return -Contents.ImmVal;
- }
- return Contents.ImmVal;
- }
- void setImm(int64_t Val) {
- Contents.ImmVal = Val;
+ PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
+ initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
}
- void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
- if (isReg()) { OS << PrintReg(getReg()); }
- if (isImm()) { OS << getImm(); }
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ private:
+ MachineDominatorTree *MDT;
};
+
+ char PPCCTRLoopsVerify::ID = 0;
+#endif // NDEBUG
} // end anonymous namespace
INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
false, false)
-/// isCompareEquals - Returns true if the instruction is a compare equals
-/// instruction with an immediate operand.
-static bool isCompareEqualsImm(const MachineInstr *MI, bool &SignedCmp,
- bool &Int64Cmp) {
- if (MI->getOpcode() == PPC::CMPWI) {
- SignedCmp = true;
- Int64Cmp = false;
- return true;
- } else if (MI->getOpcode() == PPC::CMPDI) {
- SignedCmp = true;
- Int64Cmp = true;
- return true;
- } else if (MI->getOpcode() == PPC::CMPLWI) {
- SignedCmp = false;
- Int64Cmp = false;
- return true;
- } else if (MI->getOpcode() == PPC::CMPLDI) {
- SignedCmp = false;
- Int64Cmp = true;
- return true;
- }
-
- return false;
+FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
+ return new PPCCTRLoops(TM);
}
+#ifndef NDEBUG
+INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
+ "PowerPC CTR Loops Verify", false, false)
-/// createPPCCTRLoops - Factory for creating
-/// the CTR loop phase.
-FunctionPass *llvm::createPPCCTRLoops() {
- return new PPCCTRLoops();
+FunctionPass *llvm::createPPCCTRLoopsVerify() {
+ return new PPCCTRLoopsVerify();
}
+#endif // NDEBUG
+bool PPCCTRLoops::runOnFunction(Function &F) {
+ LI = &getAnalysis<LoopInfo>();
+ SE = &getAnalysis<ScalarEvolution>();
+ DT = &getAnalysis<DominatorTree>();
+ TD = getAnalysisIfAvailable<DataLayout>();
+ LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
-bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
- DEBUG(dbgs() << "********* PPC CTR Loops *********\n");
-
- bool Changed = false;
+ bool MadeChange = false;
- // get the loop information
- MLI = &getAnalysis<MachineLoopInfo>();
- // get the register information
- MRI = &MF.getRegInfo();
- // the target specific instructio info.
- TII = MF.getTarget().getInstrInfo();
-
- for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end();
I != E; ++I) {
- MachineLoop *L = *I;
- if (!L->getParentLoop()) {
- Changed |= convertToCTRLoop(L);
- }
+ Loop *L = *I;
+ if (!L->getParentLoop())
+ MadeChange |= convertToCTRLoop(L);
}
- return Changed;
+ return MadeChange;
}
-/// getCanonicalInductionVariable - Check to see if the loop has a canonical
-/// induction variable. We check for a simple recurrence pattern - an
-/// integer recurrence that decrements by one each time through the loop and
-/// ends at zero. If so, return the phi node that corresponds to it.
-///
-/// Based upon the similar code in LoopInfo except this code is specific to
-/// the machine.
-/// This method assumes that the IndVarSimplify pass has been run by 'opt'.
-///
-void
-PPCCTRLoops::getCanonicalInductionVariable(MachineLoop *L,
- SmallVector<MachineInstr *, 4> &IVars,
- SmallVector<MachineInstr *, 4> &IOps) const {
- MachineBasicBlock *TopMBB = L->getTopBlock();
- MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
- assert(PI != TopMBB->pred_end() &&
- "Loop must have more than one incoming edge!");
- MachineBasicBlock *Backedge = *PI++;
- if (PI == TopMBB->pred_end()) return; // dead loop
- MachineBasicBlock *Incoming = *PI++;
- if (PI != TopMBB->pred_end()) return; // multiple backedges?
-
- // make sure there is one incoming and one backedge and determine which
- // is which.
- if (L->contains(Incoming)) {
- if (L->contains(Backedge))
- return;
- std::swap(Incoming, Backedge);
- } else if (!L->contains(Backedge))
- return;
-
- // Loop over all of the PHI nodes, looking for a canonical induction variable:
- // - The PHI node is "reg1 = PHI reg2, BB1, reg3, BB2".
- // - The recurrence comes from the backedge.
- // - the definition is an induction operatio.n
- for (MachineBasicBlock::iterator I = TopMBB->begin(), E = TopMBB->end();
- I != E && I->isPHI(); ++I) {
- MachineInstr *MPhi = &*I;
- unsigned DefReg = MPhi->getOperand(0).getReg();
- for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) {
- // Check each operand for the value from the backedge.
- MachineBasicBlock *MBB = MPhi->getOperand(i+1).getMBB();
- if (L->contains(MBB)) { // operands comes from the backedge
- // Check if the definition is an induction operation.
- MachineInstr *DI = MRI->getVRegDef(MPhi->getOperand(i).getReg());
- if (isInductionOperation(DI, DefReg)) {
- IOps.push_back(DI);
- IVars.push_back(MPhi);
+bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
+ for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
+ J != JE; ++J) {
+ if (CallInst *CI = dyn_cast<CallInst>(J)) {
+ if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
+ // Inline ASM is okay, unless it clobbers the ctr register.
+ InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
+ for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
+ InlineAsm::ConstraintInfo &C = CIV[i];
+ if (C.Type != InlineAsm::isInput)
+ for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
+ if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
+ return true;
}
- }
- }
- }
- return;
-}
-/// getTripCount - Return a loop-invariant LLVM value indicating the
-/// number of times the loop will be executed. The trip count can
-/// be either a register or a constant value. If the trip-count
-/// cannot be determined, this returns null.
-///
-/// We find the trip count from the phi instruction that defines the
-/// induction variable. We follow the links to the CMP instruction
-/// to get the trip count.
-///
-/// Based upon getTripCount in LoopInfo.
-///
-CountValue *PPCCTRLoops::getTripCount(MachineLoop *L,
- SmallVector<MachineInstr *, 2> &OldInsts) const {
- MachineBasicBlock *LastMBB = L->getExitingBlock();
- // Don't generate a CTR loop if the loop has more than one exit.
- if (LastMBB == 0)
- return 0;
-
- MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
- if (LastI->getOpcode() != PPC::BCC)
- return 0;
-
- // We need to make sure that this compare is defining the condition
- // register actually used by the terminating branch.
-
- unsigned PredReg = LastI->getOperand(1).getReg();
- DEBUG(dbgs() << "Examining loop with first terminator: " << *LastI);
-
- unsigned PredCond = LastI->getOperand(0).getImm();
- if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)
- return 0;
-
- // Check that the loop has a induction variable.
- SmallVector<MachineInstr *, 4> IVars, IOps;
- getCanonicalInductionVariable(L, IVars, IOps);
- for (unsigned i = 0; i < IVars.size(); ++i) {
- MachineInstr *IOp = IOps[i];
- MachineInstr *IV_Inst = IVars[i];
-
- // Canonical loops will end with a 'cmpwi/cmpdi cr, IV, Imm',
- // if Imm is 0, get the count from the PHI opnd
- // if Imm is -M, than M is the count
- // Otherwise, Imm is the count
- MachineOperand *IV_Opnd;
- const MachineOperand *InitialValue;
- if (!L->contains(IV_Inst->getOperand(2).getMBB())) {
- InitialValue = &IV_Inst->getOperand(1);
- IV_Opnd = &IV_Inst->getOperand(3);
- } else {
- InitialValue = &IV_Inst->getOperand(3);
- IV_Opnd = &IV_Inst->getOperand(1);
- }
+ continue;
+ }
- DEBUG(dbgs() << "Considering:\n");
- DEBUG(dbgs() << " induction operation: " << *IOp);
- DEBUG(dbgs() << " induction variable: " << *IV_Inst);
- DEBUG(dbgs() << " initial value: " << *InitialValue << "\n");
-
- // Look for the cmp instruction to determine if we
- // can get a useful trip count. The trip count can
- // be either a register or an immediate. The location
- // of the value depends upon the type (reg or imm).
- for (MachineRegisterInfo::reg_iterator
- RI = MRI->reg_begin(IV_Opnd->getReg()), RE = MRI->reg_end();
- RI != RE; ++RI) {
- IV_Opnd = &RI.getOperand();
- bool SignedCmp, Int64Cmp;
- MachineInstr *MI = IV_Opnd->getParent();
- if (L->contains(MI) && isCompareEqualsImm(MI, SignedCmp, Int64Cmp) &&
- MI->getOperand(0).getReg() == PredReg) {
-
- OldInsts.push_back(MI);
- OldInsts.push_back(IOp);
-
- DEBUG(dbgs() << " compare: " << *MI);
-
- const MachineOperand &MO = MI->getOperand(2);
- assert(MO.isImm() && "IV Cmp Operand should be an immediate");
-
- int64_t ImmVal;
- if (SignedCmp)
- ImmVal = (short) MO.getImm();
- else
- ImmVal = MO.getImm();
-
- const MachineInstr *IV_DefInstr = MRI->getVRegDef(IV_Opnd->getReg());
- assert(L->contains(IV_DefInstr->getParent()) &&
- "IV definition should occurs in loop");
- int64_t iv_value = (short) IV_DefInstr->getOperand(2).getImm();
-
- assert(InitialValue->isReg() && "Expecting register for init value");
- unsigned InitialValueReg = InitialValue->getReg();
-
- MachineInstr *DefInstr = MRI->getVRegDef(InitialValueReg);
-
- // Here we need to look for an immediate load (an li or lis/ori pair).
- if (DefInstr && (DefInstr->getOpcode() == PPC::ORI8 ||
- DefInstr->getOpcode() == PPC::ORI)) {
- int64_t start = DefInstr->getOperand(2).getImm();
- MachineInstr *DefInstr2 =
- MRI->getVRegDef(DefInstr->getOperand(1).getReg());
- if (DefInstr2 && (DefInstr2->getOpcode() == PPC::LIS8 ||
- DefInstr2->getOpcode() == PPC::LIS)) {
- DEBUG(dbgs() << " initial constant: " << *DefInstr);
- DEBUG(dbgs() << " initial constant: " << *DefInstr2);
-
- start |= int64_t(short(DefInstr2->getOperand(1).getImm())) << 16;
-
- int64_t count = ImmVal - start;
- if ((count % iv_value) != 0) {
- return 0;
- }
-
- OldInsts.push_back(DefInstr);
- OldInsts.push_back(DefInstr2);
-
- // count/iv_value, the trip count, should be positive here. If it
- // is negative, that indicates that the counter will wrap.
- if (Int64Cmp)
- return new CountValue(count/iv_value);
+ if (!TM)
+ return true;
+ const TargetLowering *TLI = TM->getTargetLowering();
+
+ if (Function *F = CI->getCalledFunction()) {
+ // Most intrinsics don't become function calls, but some might.
+ // sin, cos, exp and log are always calls.
+ unsigned Opcode;
+ if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
+ switch (F->getIntrinsicID()) {
+ default: continue;
+
+// VisualStudio defines setjmp as _setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::setjmp:
+
+#if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
+ // let's return it to _setjmp state
+# pragma pop_macro("setjmp")
+# undef setjmp_undefined_for_msvc
+#endif
+
+ case Intrinsic::longjmp:
+
+ // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
+ // because, although it does clobber the counter register, the
+ // control can't then return to inside the loop unless there is also
+ // an eh_sjlj_setjmp.
+ case Intrinsic::eh_sjlj_setjmp:
+
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ case Intrinsic::memset:
+ case Intrinsic::powi:
+ case Intrinsic::log:
+ case Intrinsic::log2:
+ case Intrinsic::log10:
+ case Intrinsic::exp:
+ case Intrinsic::exp2:
+ case Intrinsic::pow:
+ case Intrinsic::sin:
+ case Intrinsic::cos:
+ return true;
+ case Intrinsic::copysign:
+ if (CI->getArgOperand(0)->getType()->getScalarType()->
+ isPPC_FP128Ty())
+ return true;
else
- return new CountValue(uint32_t(count/iv_value));
- }
- } else if (DefInstr && (DefInstr->getOpcode() == PPC::LI8 ||
- DefInstr->getOpcode() == PPC::LI)) {
- DEBUG(dbgs() << " initial constant: " << *DefInstr);
-
- int64_t count = ImmVal -
- int64_t(short(DefInstr->getOperand(1).getImm()));
- if ((count % iv_value) != 0) {
- return 0;
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
+ case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
+ case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
+ case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
+ case Intrinsic::rint: Opcode = ISD::FRINT; break;
+ case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
+ case Intrinsic::round: Opcode = ISD::FROUND; break;
}
+ }
- OldInsts.push_back(DefInstr);
-
- if (Int64Cmp)
- return new CountValue(count/iv_value);
- else
- return new CountValue(uint32_t(count/iv_value));
- } else if (iv_value == 1 || iv_value == -1) {
- // We can't determine a constant starting value.
- if (ImmVal == 0) {
- return new CountValue(InitialValueReg, iv_value > 0);
+ // PowerPC does not use [US]DIVREM or other library calls for
+ // operations on regular types which are not otherwise library calls
+ // (i.e. soft float or atomics). If adapting for targets that do,
+ // additional care is required here.
+
+ LibFunc::Func Func;
+ if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
+ LibInfo->getLibFunc(F->getName(), Func) &&
+ LibInfo->hasOptimizedCodeGen(Func)) {
+ // Non-read-only functions are never treated as intrinsics.
+ if (!CI->onlyReadsMemory())
+ return true;
+
+ // Conversion happens only for FP calls.
+ if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
+ return true;
+
+ switch (Func) {
+ default: return true;
+ case LibFunc::copysign:
+ case LibFunc::copysignf:
+ continue; // ISD::FCOPYSIGN is never a library call.
+ case LibFunc::copysignl:
+ return true;
+ case LibFunc::fabs:
+ case LibFunc::fabsf:
+ case LibFunc::fabsl:
+ continue; // ISD::FABS is never a library call.
+ case LibFunc::sqrt:
+ case LibFunc::sqrtf:
+ case LibFunc::sqrtl:
+ Opcode = ISD::FSQRT; break;
+ case LibFunc::floor:
+ case LibFunc::floorf:
+ case LibFunc::floorl:
+ Opcode = ISD::FFLOOR; break;
+ case LibFunc::nearbyint:
+ case LibFunc::nearbyintf:
+ case LibFunc::nearbyintl:
+ Opcode = ISD::FNEARBYINT; break;
+ case LibFunc::ceil:
+ case LibFunc::ceilf:
+ case LibFunc::ceill:
+ Opcode = ISD::FCEIL; break;
+ case LibFunc::rint:
+ case LibFunc::rintf:
+ case LibFunc::rintl:
+ Opcode = ISD::FRINT; break;
+ case LibFunc::round:
+ case LibFunc::roundf:
+ case LibFunc::roundl:
+ Opcode = ISD::FROUND; break;
+ case LibFunc::trunc:
+ case LibFunc::truncf:
+ case LibFunc::truncl:
+ Opcode = ISD::FTRUNC; break;
}
- // FIXME: handle non-zero end value.
+
+ MVT VTy =
+ TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
+ if (VTy == MVT::Other)
+ return true;
+
+ if (TLI->isOperationLegalOrCustom(Opcode, VTy))
+ continue;
+ else if (VTy.isVector() &&
+ TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
+ continue;
+
+ return true;
}
- // FIXME: handle non-unit increments (we might not want to introduce
- // division but we can handle some 2^n cases with shifts).
-
}
- }
- }
- return 0;
-}
-
-/// isInductionOperation - return true if the operation is matches the
-/// pattern that defines an induction variable:
-/// addi iv, c
-///
-bool
-PPCCTRLoops::isInductionOperation(const MachineInstr *MI,
- unsigned IVReg) const {
- return ((MI->getOpcode() == PPC::ADDI || MI->getOpcode() == PPC::ADDI8) &&
- MI->getOperand(1).isReg() && // could be a frame index instead
- MI->getOperand(1).getReg() == IVReg);
-}
-/// isInvalidOperation - Return true if the operation is invalid within
-/// CTR loop.
-bool
-PPCCTRLoops::isInvalidLoopOperation(const MachineInstr *MI) const {
-
- // call is not allowed because the callee may use a CTR loop
- if (MI->getDesc().isCall()) {
- return true;
- }
- // check if the instruction defines a CTR loop register
- // (this will also catch nested CTR loops)
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef() &&
- (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8)) {
return true;
- }
- }
- return false;
-}
+ } else if (isa<BinaryOperator>(J) &&
+ J->getType()->getScalarType()->isPPC_FP128Ty()) {
+ // Most operations on ppc_f128 values become calls.
+ return true;
+ } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
+ isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
+ CastInst *CI = cast<CastInst>(J);
+ if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
+ CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
+ (TT.isArch32Bit() &&
+ (CI->getSrcTy()->getScalarType()->isIntegerTy(64) ||
+ CI->getDestTy()->getScalarType()->isIntegerTy(64))
+ ))
+ return true;
+ } else if (TT.isArch32Bit() &&
+ J->getType()->getScalarType()->isIntegerTy(64) &&
+ (J->getOpcode() == Instruction::UDiv ||
+ J->getOpcode() == Instruction::SDiv ||
+ J->getOpcode() == Instruction::URem ||
+ J->getOpcode() == Instruction::SRem)) {
+ return true;
+ } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
+ // On PowerPC, indirect jumps use the counter register.
+ return true;
+ } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
+ if (!TM)
+ return true;
+ const TargetLowering *TLI = TM->getTargetLowering();
-/// containsInvalidInstruction - Return true if the loop contains
-/// an instruction that inhibits the use of the CTR loop function.
-///
-bool PPCCTRLoops::containsInvalidInstruction(MachineLoop *L) const {
- const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Blocks[i];
- for (MachineBasicBlock::iterator
- MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
- const MachineInstr *MI = &*MII;
- if (isInvalidLoopOperation(MI)) {
+ if (TLI->supportJumpTables() &&
+ SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
return true;
- }
}
}
+
return false;
}
-/// isDead returns true if the instruction is dead
-/// (this was essentially copied from DeadMachineInstructionElim::isDead, but
-/// with special cases for inline asm, physical registers and instructions with
-/// side effects removed)
-bool PPCCTRLoops::isDead(const MachineInstr *MI,
- SmallVector<MachineInstr *, 1> &DeadPhis) const {
- // Examine each operand.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (!MRI->use_nodbg_empty(Reg)) {
- // This instruction has users, but if the only user is the phi node for
- // the parent block, and the only use of that phi node is this
- // instruction, then this instruction is dead: both it (and the phi
- // node) can be removed.
- MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg);
- if (llvm::next(I) == MRI->use_end() &&
- I.getOperand().getParent()->isPHI()) {
- MachineInstr *OnePhi = I.getOperand().getParent();
-
- for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
- const MachineOperand &OPO = OnePhi->getOperand(j);
- if (OPO.isReg() && OPO.isDef()) {
- unsigned OPReg = OPO.getReg();
-
- MachineRegisterInfo::use_iterator nextJ;
- for (MachineRegisterInfo::use_iterator J = MRI->use_begin(OPReg),
- E = MRI->use_end(); J!=E; J=nextJ) {
- nextJ = llvm::next(J);
- MachineOperand& Use = J.getOperand();
- MachineInstr *UseMI = Use.getParent();
-
- if (MI != UseMI) {
- // The phi node has a user that is not MI, bail...
- return false;
- }
- }
- }
- }
+bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
+ bool MadeChange = false;
- DeadPhis.push_back(OnePhi);
- } else {
- // This def has a non-debug use. Don't delete the instruction!
- return false;
- }
- }
- }
+ Triple TT = Triple(L->getHeader()->getParent()->getParent()->
+ getTargetTriple());
+ if (!TT.isArch32Bit() && !TT.isArch64Bit())
+ return MadeChange; // Unknown arch. type.
+
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
+ MadeChange |= convertToCTRLoop(*I);
}
- // If there are no defs with uses, the instruction is dead.
- return true;
-}
+ // If a nested loop has been converted, then we can't convert this loop.
+ if (MadeChange)
+ return MadeChange;
+
+#ifndef NDEBUG
+ // Stop trying after reaching the limit (if any).
+ int Limit = CTRLoopLimit;
+ if (Limit >= 0) {
+ if (Counter >= CTRLoopLimit)
+ return false;
+ Counter++;
+ }
+#endif
+
+ // We don't want to spill/restore the counter register, and so we don't
+ // want to use the counter register if the loop contains calls.
+ for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
+ I != IE; ++I)
+ if (mightUseCTR(TT, *I))
+ return MadeChange;
+
+ SmallVector<BasicBlock*, 4> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+
+ BasicBlock *CountedExitBlock = 0;
+ const SCEV *ExitCount = 0;
+ BranchInst *CountedExitBranch = 0;
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ IE = ExitingBlocks.end(); I != IE; ++I) {
+ const SCEV *EC = SE->getExitCount(L, *I);
+ DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
+ (*I)->getName() << ": " << *EC << "\n");
+ if (isa<SCEVCouldNotCompute>(EC))
+ continue;
+ if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
+ if (ConstEC->getValue()->isZero())
+ continue;
+ } else if (!SE->isLoopInvariant(EC, L))
+ continue;
+
+ if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
+ continue;
+
+ // We now have a loop-invariant count of loop iterations (which is not the
+ // constant zero) for which we know that this loop will not exit via this
+ // exisiting block.
+
+ // We need to make sure that this block will run on every loop iteration.
+ // For this to be true, we must dominate all blocks with backedges. Such
+ // blocks are in-loop predecessors to the header block.
+ bool NotAlways = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
+ if (!L->contains(*PI))
+ continue;
-void PPCCTRLoops::removeIfDead(MachineInstr *MI) {
- // This procedure was essentially copied from DeadMachineInstructionElim
+ if (!DT->dominates(*I, *PI)) {
+ NotAlways = true;
+ break;
+ }
+ }
- SmallVector<MachineInstr *, 1> DeadPhis;
- if (isDead(MI, DeadPhis)) {
- DEBUG(dbgs() << "CTR looping will remove: " << *MI);
+ if (NotAlways)
+ continue;
- // It is possible that some DBG_VALUE instructions refer to this
- // instruction. Examine each def operand for such references;
- // if found, mark the DBG_VALUE as undef (but don't delete it).
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !MO.isDef())
+ // Make sure this blocks ends with a conditional branch.
+ Instruction *TI = (*I)->getTerminator();
+ if (!TI)
+ continue;
+
+ if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+ if (!BI->isConditional())
continue;
- unsigned Reg = MO.getReg();
- MachineRegisterInfo::use_iterator nextI;
- for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
- E = MRI->use_end(); I!=E; I=nextI) {
- nextI = llvm::next(I); // I is invalidated by the setReg
- MachineOperand& Use = I.getOperand();
- MachineInstr *UseMI = Use.getParent();
- if (UseMI==MI)
- continue;
- if (Use.isDebug()) // this might also be a instr -> phi -> instr case
- // which can also be removed.
- UseMI->getOperand(0).setReg(0U);
- }
- }
- MI->eraseFromParent();
- for (unsigned i = 0; i < DeadPhis.size(); ++i) {
- DeadPhis[i]->eraseFromParent();
- }
+ CountedExitBranch = BI;
+ } else
+ continue;
+
+ // Note that this block may not be the loop latch block, even if the loop
+ // has a latch block.
+ CountedExitBlock = *I;
+ ExitCount = EC;
+ break;
}
+
+ if (!CountedExitBlock)
+ return MadeChange;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // If we don't have a preheader, then insert one. If we already have a
+ // preheader, then we can use it (except if the preheader contains a use of
+ // the CTR register because some such uses might be reordered by the
+ // selection DAG after the mtctr instruction).
+ if (!Preheader || mightUseCTR(TT, Preheader))
+ Preheader = InsertPreheaderForLoop(L, this);
+ if (!Preheader)
+ return MadeChange;
+
+ DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
+
+ // Insert the count into the preheader and replace the condition used by the
+ // selected branch.
+ MadeChange = true;
+
+ SCEVExpander SCEVE(*SE, "loopcnt");
+ LLVMContext &C = SE->getContext();
+ Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
+ Type::getInt32Ty(C);
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
+ ExitCount = SE->getAddExpr(ExitCount,
+ SE->getConstant(CountType, 1));
+ Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
+ Preheader->getTerminator());
+
+ IRBuilder<> CountBuilder(Preheader->getTerminator());
+ Module *M = Preheader->getParent()->getParent();
+ Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
+ CountType);
+ CountBuilder.CreateCall(MTCTRFunc, ECValue);
+
+ IRBuilder<> CondBuilder(CountedExitBranch);
+ Value *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
+ Value *NewCond = CondBuilder.CreateCall(DecFunc);
+ Value *OldCond = CountedExitBranch->getCondition();
+ CountedExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(CountedExitBranch->getSuccessor(0)))
+ CountedExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+ DeleteDeadPHIs(CountedExitBlock);
+
+ ++NumCTRLoops;
+ return MadeChange;
}
-/// converToCTRLoop - check if the loop is a candidate for
-/// converting to a CTR loop. If so, then perform the
-/// transformation.
-///
-/// This function works on innermost loops first. A loop can
-/// be converted if it is a counting loop; either a register
-/// value or an immediate.
-///
-/// The code makes several assumptions about the representation
-/// of the loop in llvm.
-bool PPCCTRLoops::convertToCTRLoop(MachineLoop *L) {
- bool Changed = false;
- // Process nested loops first.
- for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
- Changed |= convertToCTRLoop(*I);
- }
- // If a nested loop has been converted, then we can't convert this loop.
- if (Changed) {
- return Changed;
+#ifndef NDEBUG
+static bool clobbersCTR(const MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg()) {
+ if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
+ return true;
+ } else if (MO.isRegMask()) {
+ if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
+ return true;
+ }
}
- SmallVector<MachineInstr *, 2> OldInsts;
- // Are we able to determine the trip count for the loop?
- CountValue *TripCount = getTripCount(L, OldInsts);
- if (TripCount == 0) {
- DEBUG(dbgs() << "failed to get trip count!\n");
- return false;
- }
+ return false;
+}
- if (TripCount->isImm()) {
- DEBUG(dbgs() << "constant trip count: " << TripCount->getImm() << "\n");
+static bool verifyCTRBranch(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator I) {
+ MachineBasicBlock::iterator BI = I;
+ SmallSet<MachineBasicBlock *, 16> Visited;
+ SmallVector<MachineBasicBlock *, 8> Preds;
+ bool CheckPreds;
+
+ if (I == MBB->begin()) {
+ Visited.insert(MBB);
+ goto queue_preds;
+ } else
+ --I;
+
+check_block:
+ Visited.insert(MBB);
+ if (I == MBB->end())
+ goto queue_preds;
+
+ CheckPreds = true;
+ for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
+ unsigned Opc = I->getOpcode();
+ if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
+ CheckPreds = false;
+ break;
+ }
- // FIXME: We currently can't form 64-bit constants
- // (including 32-bit unsigned constants)
- if (!isInt<32>(TripCount->getImm()))
+ if (I != BI && clobbersCTR(I)) {
+ DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
+ MBB->getFullName() << ") instruction " << *I <<
+ " clobbers CTR, invalidating " << "BB#" <<
+ BI->getParent()->getNumber() << " (" <<
+ BI->getParent()->getFullName() << ") instruction " <<
+ *BI << "\n");
return false;
- }
+ }
- // Does the loop contain any invalid instructions?
- if (containsInvalidInstruction(L)) {
- return false;
+ if (I == IE)
+ break;
}
- MachineBasicBlock *Preheader = L->getLoopPreheader();
- // No preheader means there's not place for the loop instr.
- if (Preheader == 0) {
- return false;
- }
- MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
- DebugLoc dl;
- if (InsertPos != Preheader->end())
- dl = InsertPos->getDebugLoc();
+ if (!CheckPreds && Preds.empty())
+ return true;
- MachineBasicBlock *LastMBB = L->getExitingBlock();
- // Don't generate CTR loop if the loop has more than one exit.
- if (LastMBB == 0) {
- return false;
- }
- MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
-
- // Determine the loop start.
- MachineBasicBlock *LoopStart = L->getTopBlock();
- if (L->getLoopLatch() != LastMBB) {
- // When the exit and latch are not the same, use the latch block as the
- // start.
- // The loop start address is used only after the 1st iteration, and the loop
- // latch may contains instrs. that need to be executed after the 1st iter.
- LoopStart = L->getLoopLatch();
- // Make sure the latch is a successor of the exit, otherwise it won't work.
- if (!LastMBB->isSuccessor(LoopStart)) {
+ if (CheckPreds) {
+queue_preds:
+ if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
+ DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
+ BI->getParent()->getNumber() << " (" <<
+ BI->getParent()->getFullName() << ") instruction " <<
+ *BI << "\n");
return false;
}
+
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PIE = MBB->pred_end(); PI != PIE; ++PI)
+ Preds.push_back(*PI);
}
- // Convert the loop to a CTR loop
- DEBUG(dbgs() << "Change to CTR loop at "; L->dump());
-
- MachineFunction *MF = LastMBB->getParent();
- const PPCSubtarget &Subtarget = MF->getTarget().getSubtarget<PPCSubtarget>();
- bool isPPC64 = Subtarget.isPPC64();
-
- const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
- const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
- const TargetRegisterClass *RC = isPPC64 ? G8RC : GPRC;
-
- unsigned CountReg;
- if (TripCount->isReg()) {
- // Create a copy of the loop count register.
- const TargetRegisterClass *SrcRC =
- MF->getRegInfo().getRegClass(TripCount->getReg());
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- unsigned CopyOp = (isPPC64 && GPRC->hasSubClassEq(SrcRC)) ?
- (unsigned) PPC::EXTSW_32_64 :
- (unsigned) TargetOpcode::COPY;
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(CopyOp), CountReg).addReg(TripCount->getReg());
- if (TripCount->isNeg()) {
- unsigned CountReg1 = CountReg;
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(isPPC64 ? PPC::NEG8 : PPC::NEG),
- CountReg).addReg(CountReg1);
+ do {
+ MBB = Preds.pop_back_val();
+ if (!Visited.count(MBB)) {
+ I = MBB->getLastNonDebugInstr();
+ goto check_block;
}
- } else {
- assert(TripCount->isImm() && "Expecting immedate vaule for trip count");
- // Put the trip count in a register for transfer into the count register.
-
- int64_t CountImm = TripCount->getImm();
- if (TripCount->isNeg())
- CountImm = -CountImm;
-
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- if (abs64(CountImm) > 0x7FFF) {
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS),
- CountReg).addImm((CountImm >> 16) & 0xFFFF);
- unsigned CountReg1 = CountReg;
- CountReg = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
- CountReg).addReg(CountReg1).addImm(CountImm & 0xFFFF);
- } else {
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(isPPC64 ? PPC::LI8 : PPC::LI),
- CountReg).addImm(CountImm);
- }
- }
+ } while (!Preds.empty());
- // Add the mtctr instruction to the beginning of the loop.
- BuildMI(*Preheader, InsertPos, dl,
- TII->get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(CountReg,
- TripCount->isImm() ? RegState::Kill : 0);
-
- // Make sure the loop start always has a reference in the CFG. We need to
- // create a BlockAddress operand to get this mechanism to work both the
- // MachineBasicBlock and BasicBlock objects need the flag set.
- LoopStart->setHasAddressTaken();
- // This line is needed to set the hasAddressTaken flag on the BasicBlock
- // object
- BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
-
- // Replace the loop branch with a bdnz instruction.
- dl = LastI->getDebugLoc();
- const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *MBB = Blocks[i];
- if (MBB != Preheader)
- MBB->addLiveIn(isPPC64 ? PPC::CTR8 : PPC::CTR);
- }
+ return true;
+}
- // The loop ends with either:
- // - a conditional branch followed by an unconditional branch, or
- // - a conditional branch to the loop start.
- assert(LastI->getOpcode() == PPC::BCC &&
- "loop end must start with a BCC instruction");
- // Either the BCC branches to the beginning of the loop, or it
- // branches out of the loop and there is an unconditional branch
- // to the start of the loop.
- MachineBasicBlock *BranchTarget = LastI->getOperand(2).getMBB();
- BuildMI(*LastMBB, LastI, dl,
- TII->get((BranchTarget == LoopStart) ?
- (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
- (isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(BranchTarget);
-
- // Conditional branch; just delete it.
- DEBUG(dbgs() << "Removing old branch: " << *LastI);
- LastMBB->erase(LastI);
-
- delete TripCount;
-
- // The induction operation (add) and the comparison (cmpwi) may now be
- // unneeded. If these are unneeded, then remove them.
- for (unsigned i = 0; i < OldInsts.size(); ++i)
- removeIfDead(OldInsts[i]);
+bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
+ MDT = &getAnalysis<MachineDominatorTree>();
+
+ // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
+ // any other instructions that might clobber the ctr register.
+ for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
+ I != IE; ++I) {
+ MachineBasicBlock *MBB = I;
+ if (!MDT->isReachableFromEntry(MBB))
+ continue;
+
+ for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
+ MIIE = MBB->end(); MII != MIIE; ++MII) {
+ unsigned Opc = MII->getOpcode();
+ if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
+ Opc == PPC::BDZ8 || Opc == PPC::BDZ)
+ if (!verifyCTRBranch(MBB, MII))
+ llvm_unreachable("Invalid PPC CTR loop!");
+ }
+ }
- ++NumCTRLoops;
- return true;
+ return false;
}
+#endif // NDEBUG
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index c8a29a3..e8e7f4c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -37,6 +37,37 @@ def RetCC_PPC : CallingConv<[
]>;
+// Note that we don't currently have calling conventions for 64-bit
+// PowerPC, but handle all the complexities of the ABI in the lowering
+// logic. FIXME: See if the logic can be simplified with use of CCs.
+// This may require some extensions to current table generation.
+
+// Simple calling convention for 64-bit ELF PowerPC fast isel.
+// Only handle ints and floats. All ints are promoted to i64.
+// Vector types and quadword ints are not handled.
+def CC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>,
+ CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>
+]>;
+
+// Simple return-value convention for 64-bit ELF PowerPC fast isel.
+// All small ints are promoted to i64. Vector types, quadword ints,
+// and multiple register returns are "supported" to avoid compile
+// errors, but none are handled by the fast selector.
+def RetCC_PPC64_ELF_FIS : CallingConv<[
+ CCIfType<[i8], CCPromoteToType<i64>>,
+ CCIfType<[i16], CCPromoteToType<i64>>,
+ CCIfType<[i32], CCPromoteToType<i64>>,
+ CCIfType<[i64], CCAssignToReg<[X3, X4]>>,
+ CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>,
+ CCIfType<[f32], CCAssignToReg<[F1, F2]>>,
+ CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4]>>,
+ CCIfType<[v16i8, v8i16, v4i32, v4f32], CCAssignToReg<[V2]>>
+]>;
+
//===----------------------------------------------------------------------===//
// PowerPC System V Release 4 32-bit ABI
//===----------------------------------------------------------------------===//
@@ -105,40 +136,45 @@ def CC_PPC32_SVR4_ByVal : CallingConv<[
CCCustom<"CC_PPC32_SVR4_Custom_Dummy">
]>;
+def CSR_Altivec : CalleeSavedRegs<(add V20, V21, V22, V23, V24, V25, V26, V27,
+ V28, V29, V30, V31)>;
+
def CSR_Darwin32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27, R28,
R29, R30, R31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4,
- V20, V21, V22, V23, V24, V25, V26, V27,
- V28, V29, V30, V31)>;
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_Darwin32_Altivec : CalleeSavedRegs<(add CSR_Darwin32, CSR_Altivec)>;
-def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20, VRSAVE,
+def CSR_SVR432 : CalleeSavedRegs<(add R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27, R28,
R29, R30, R31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4,
- V20, V21, V22, V23, V24, V25, V26, V27,
- V28, V29, V30, V31)>;
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
+def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4,
- V20, V21, V22, V23, V24, V25, V26, V27,
- V28, V29, V30, V31)>;
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
-def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20, VRSAVE,
+def CSR_Darwin64_Altivec : CalleeSavedRegs<(add CSR_Darwin64, CSR_Altivec)>;
+
+def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
- F27, F28, F29, F30, F31, CR2, CR3, CR4,
- V20, V21, V22, V23, V24, V25, V26, V27,
- V28, V29, V30, V31)>;
+ F27, F28, F29, F30, F31, CR2, CR3, CR4
+ )>;
+
-def CSR_NoRegs : CalleeSavedRegs<(add VRSAVE)>;
-def CSR_NoRegs_Darwin : CalleeSavedRegs<(add)>;
+def CSR_SVR464_Altivec : CalleeSavedRegs<(add CSR_SVR464, CSR_Altivec)>;
-def CSR_NoRegs_Altivec : CalleeSavedRegs<(add (sequence "V%u", 0, 31), VRSAVE)>;
+def CSR_NoRegs : CalleeSavedRegs<(add)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
index 6478718..418736e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp
@@ -63,12 +63,15 @@ namespace {
unsigned get_crbitm_encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getDirectBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getAbsDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const;
+ unsigned getAbsCondBrEncoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getHA16Encoding(const MachineInstr &MI, unsigned OpNo) const;
- unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getImm16Encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
+ unsigned getTLSCallEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
@@ -139,8 +142,8 @@ void PPCCodeEmitter::emitBasicBlock(MachineBasicBlock &MBB) {
unsigned PPCCodeEmitter::get_crbitm_encoding(const MachineInstr &MI,
unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);
- assert((MI.getOpcode() == PPC::MTCRF || MI.getOpcode() == PPC::MTCRF8 ||
- MI.getOpcode() == PPC::MFOCRF) &&
+ assert((MI.getOpcode() == PPC::MTOCRF || MI.getOpcode() == PPC::MTOCRF8 ||
+ MI.getOpcode() == PPC::MFOCRF || MI.getOpcode() == PPC::MFOCRF8) &&
(MO.getReg() >= PPC::CR0 && MO.getReg() <= PPC::CR7));
return 0x80 >> TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
@@ -194,21 +197,32 @@ unsigned PPCCodeEmitter::getCondBrEncoding(const MachineInstr &MI,
return 0;
}
-unsigned PPCCodeEmitter::getHA16Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
+unsigned PPCCodeEmitter::getAbsDirectBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_high));
- return 0;
+ llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
+}
+
+unsigned PPCCodeEmitter::getAbsCondBrEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("Absolute branch relocations unsupported on the old JIT.");
}
-unsigned PPCCodeEmitter::getLO16Encoding(const MachineInstr &MI,
- unsigned OpNo) const {
+unsigned PPCCodeEmitter::getImm16Encoding(const MachineInstr &MI,
+ unsigned OpNo) const {
const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isReg() || MO.isImm()) return getMachineOpValue(MI, MO);
-
- MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low));
+
+ unsigned RelocID;
+ switch (MO.getTargetFlags() & PPCII::MO_ACCESS_MASK) {
+ default: llvm_unreachable("Unsupported target operand flags!");
+ case PPCII::MO_LO: RelocID = PPC::reloc_absolute_low; break;
+ case PPCII::MO_HA: RelocID = PPC::reloc_absolute_high; break;
+ }
+
+ MCE.addRelocation(GetRelocation(MO, RelocID));
return 0;
}
@@ -237,7 +251,7 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(OpNo);
if (MO.isImm())
- return (getMachineOpValue(MI, MO) & 0x3FFF) | RegBits;
+ return ((getMachineOpValue(MI, MO) >> 2) & 0x3FFF) | RegBits;
MCE.addRelocation(GetRelocation(MO, PPC::reloc_absolute_low_ix));
return RegBits;
@@ -250,15 +264,20 @@ unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
return 0;
}
+unsigned PPCCodeEmitter::getTLSCallEncoding(const MachineInstr &MI,
+ unsigned OpNo) const {
+ llvm_unreachable("TLS not supported on the old JIT.");
+ return 0;
+}
unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {
if (MO.isReg()) {
- // MTCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
+ // MTOCRF/MFOCRF should go through get_crbitm_encoding for the CR operand.
// The GPR operand should come through here though.
- assert((MI.getOpcode() != PPC::MTCRF && MI.getOpcode() != PPC::MTCRF8 &&
- MI.getOpcode() != PPC::MFOCRF) ||
+ assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
+ MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
return TM.getRegisterInfo()->getEncodingValue(MO.getReg());
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
new file mode 100644
index 0000000..09117e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -0,0 +1,2236 @@
+//===-- PPCFastISel.cpp - PowerPC FastISel implementation -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the PowerPC-specific support for the FastISel class. Some
+// of the target-specific code is generated by tablegen in the file
+// PPCGenFastISel.inc, which is #included here.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "ppcfastisel"
+#include "PPC.h"
+#include "PPCISelLowering.h"
+#include "PPCSubtarget.h"
+#include "PPCTargetMachine.h"
+#include "MCTargetDesc/PPCPredicates.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/FastISel.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+
+//===----------------------------------------------------------------------===//
+//
+// TBD:
+// FastLowerArguments: Handle simple cases.
+// PPCMaterializeGV: Handle TLS.
+// SelectCall: Handle function pointers.
+// SelectCall: Handle multi-register return values.
+// SelectCall: Optimize away nops for local calls.
+// processCallArgs: Handle bit-converted arguments.
+// finishCall: Handle multi-register return values.
+// PPCComputeAddress: Handle parameter references as FrameIndex's.
+// PPCEmitCmp: Handle immediate as operand 1.
+// SelectCall: Handle small byval arguments.
+// SelectIntrinsicCall: Implement.
+// SelectSelect: Implement.
+// Consider factoring isTypeLegal into the base class.
+// Implement switches and jump tables.
+//
+//===----------------------------------------------------------------------===//
+using namespace llvm;
+
+namespace {
+
+typedef struct Address {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ union {
+ unsigned Reg;
+ int FI;
+ } Base;
+
+ long Offset;
+
+ // Innocuous defaults for our address.
+ Address()
+ : BaseType(RegBase), Offset(0) {
+ Base.Reg = 0;
+ }
+} Address;
+
+class PPCFastISel : public FastISel {
+
+ const TargetMachine &TM;
+ const TargetInstrInfo &TII;
+ const TargetLowering &TLI;
+ const PPCSubtarget &PPCSubTarget;
+ LLVMContext *Context;
+
+ public:
+ explicit PPCFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo)
+ : FastISel(FuncInfo, LibInfo),
+ TM(FuncInfo.MF->getTarget()),
+ TII(*TM.getInstrInfo()),
+ TLI(*TM.getTargetLowering()),
+ PPCSubTarget(
+ *((static_cast<const PPCTargetMachine *>(&TM))->getSubtargetImpl())
+ ),
+ Context(&FuncInfo.Fn->getContext()) { }
+
+ // Backend specific FastISel code.
+ private:
+ virtual bool TargetSelectInstruction(const Instruction *I);
+ virtual unsigned TargetMaterializeConstant(const Constant *C);
+ virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI);
+ virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI);
+ virtual bool FastLowerArguments();
+ virtual unsigned FastEmit_i(MVT Ty, MVT RetTy, unsigned Opc, uint64_t Imm);
+ virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm);
+ virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill);
+ virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill);
+
+ // Instruction selection routines.
+ private:
+ bool SelectLoad(const Instruction *I);
+ bool SelectStore(const Instruction *I);
+ bool SelectBranch(const Instruction *I);
+ bool SelectIndirectBr(const Instruction *I);
+ bool SelectCmp(const Instruction *I);
+ bool SelectFPExt(const Instruction *I);
+ bool SelectFPTrunc(const Instruction *I);
+ bool SelectIToFP(const Instruction *I, bool IsSigned);
+ bool SelectFPToI(const Instruction *I, bool IsSigned);
+ bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
+ bool SelectCall(const Instruction *I);
+ bool SelectRet(const Instruction *I);
+ bool SelectTrunc(const Instruction *I);
+ bool SelectIntExt(const Instruction *I);
+
+ // Utility routines.
+ private:
+ bool isTypeLegal(Type *Ty, MVT &VT);
+ bool isLoadTypeLegal(Type *Ty, MVT &VT);
+ bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
+ bool isZExt, unsigned DestReg);
+ bool PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC, bool IsZExt = true,
+ unsigned FP64LoadOpc = PPC::LFD);
+ bool PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr);
+ bool PPCComputeAddress(const Value *Obj, Address &Addr);
+ void PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg);
+ bool PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt);
+ unsigned PPCMaterializeFP(const ConstantFP *CFP, MVT VT);
+ unsigned PPCMaterializeGV(const GlobalValue *GV, MVT VT);
+ unsigned PPCMaterializeInt(const Constant *C, MVT VT);
+ unsigned PPCMaterialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC);
+ unsigned PPCMaterialize64BitInt(int64_t Imm,
+ const TargetRegisterClass *RC);
+ unsigned PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned);
+ unsigned PPCMoveToFPReg(MVT VT, unsigned SrcReg, bool IsSigned);
+
+ // Call handling routines.
+ private:
+ bool processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg);
+ void finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg);
+ CCAssignFn *usePPC32CCs(unsigned Flag);
+
+ private:
+ #include "PPCGenFastISel.inc"
+
+};
+
+} // end anonymous namespace
+
+#include "PPCGenCallingConv.inc"
+
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCFastISel::usePPC32CCs(unsigned Flag) {
+ if (Flag == 1)
+ return CC_PPC32_SVR4;
+ else if (Flag == 2)
+ return CC_PPC32_SVR4_ByVal;
+ else if (Flag == 3)
+ return CC_PPC32_SVR4_VarArg;
+ else
+ return RetCC_PPC;
+}
+
+static Optional<PPC::Predicate> getComparePred(CmpInst::Predicate Pred) {
+ switch (Pred) {
+ // These are not representable with any single compare.
+ case CmpInst::FCMP_FALSE:
+ case CmpInst::FCMP_UEQ:
+ case CmpInst::FCMP_UGT:
+ case CmpInst::FCMP_UGE:
+ case CmpInst::FCMP_ULT:
+ case CmpInst::FCMP_ULE:
+ case CmpInst::FCMP_UNE:
+ case CmpInst::FCMP_TRUE:
+ default:
+ return Optional<PPC::Predicate>();
+
+ case CmpInst::FCMP_OEQ:
+ case CmpInst::ICMP_EQ:
+ return PPC::PRED_EQ;
+
+ case CmpInst::FCMP_OGT:
+ case CmpInst::ICMP_UGT:
+ case CmpInst::ICMP_SGT:
+ return PPC::PRED_GT;
+
+ case CmpInst::FCMP_OGE:
+ case CmpInst::ICMP_UGE:
+ case CmpInst::ICMP_SGE:
+ return PPC::PRED_GE;
+
+ case CmpInst::FCMP_OLT:
+ case CmpInst::ICMP_ULT:
+ case CmpInst::ICMP_SLT:
+ return PPC::PRED_LT;
+
+ case CmpInst::FCMP_OLE:
+ case CmpInst::ICMP_ULE:
+ case CmpInst::ICMP_SLE:
+ return PPC::PRED_LE;
+
+ case CmpInst::FCMP_ONE:
+ case CmpInst::ICMP_NE:
+ return PPC::PRED_NE;
+
+ case CmpInst::FCMP_ORD:
+ return PPC::PRED_NU;
+
+ case CmpInst::FCMP_UNO:
+ return PPC::PRED_UN;
+ }
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel, and return its equivalent machine type in VT.
+// FIXME: Copied directly from ARM -- factor into base class?
+bool PPCFastISel::isTypeLegal(Type *Ty, MVT &VT) {
+ EVT Evt = TLI.getValueType(Ty, true);
+
+ // Only handle simple types.
+ if (Evt == MVT::Other || !Evt.isSimple()) return false;
+ VT = Evt.getSimpleVT();
+
+ // Handle all legal types, i.e. a register that will directly hold this
+ // value.
+ return TLI.isTypeLegal(VT);
+}
+
+// Determine whether the type Ty is simple enough to be handled by
+// fast-isel as a load target, and return its equivalent machine type in VT.
+bool PPCFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
+ if (isTypeLegal(Ty, VT)) return true;
+
+ // If this is a type than can be sign or zero-extended to a basic operation
+ // go ahead and accept it now.
+ if (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) {
+ return true;
+ }
+
+ return false;
+}
+
+// Given a value Obj, create an Address object Addr that represents its
+// address. Return false if we can't handle it.
+bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
+ const User *U = NULL;
+ unsigned Opcode = Instruction::UserOp1;
+ if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
+ // Don't walk into other basic blocks unless the object is an alloca from
+ // another block, otherwise it may not have a virtual register assigned.
+ if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
+ FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
+ Opcode = I->getOpcode();
+ U = I;
+ }
+ } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
+ Opcode = C->getOpcode();
+ U = C;
+ }
+
+ switch (Opcode) {
+ default:
+ break;
+ case Instruction::BitCast:
+ // Look through bitcasts.
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ case Instruction::IntToPtr:
+ // Look past no-op inttoptrs.
+ if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::PtrToInt:
+ // Look past no-op ptrtoints.
+ if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
+ return PPCComputeAddress(U->getOperand(0), Addr);
+ break;
+ case Instruction::GetElementPtr: {
+ Address SavedAddr = Addr;
+ long TmpOffset = Addr.Offset;
+
+ // Iterate through the GEP folding the constants into offsets where
+ // we can.
+ gep_type_iterator GTI = gep_type_begin(U);
+ for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
+ II != IE; ++II, ++GTI) {
+ const Value *Op = *II;
+ if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+ const StructLayout *SL = TD.getStructLayout(STy);
+ unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
+ TmpOffset += SL->getElementOffset(Idx);
+ } else {
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ TmpOffset += CI->getSExtValue() * S;
+ break;
+ }
+ if (canFoldAddIntoGEP(U, Op)) {
+ // A compatible add with a constant operand. Fold the constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ TmpOffset += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ // Unsupported
+ goto unsupported_gep;
+ }
+ }
+ }
+
+ // Try to grab the base operand now.
+ Addr.Offset = TmpOffset;
+ if (PPCComputeAddress(U->getOperand(0), Addr)) return true;
+
+ // We failed, restore everything and try the other options.
+ Addr = SavedAddr;
+
+ unsupported_gep:
+ break;
+ }
+ case Instruction::Alloca: {
+ const AllocaInst *AI = cast<AllocaInst>(Obj);
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = SI->second;
+ return true;
+ }
+ break;
+ }
+ }
+
+ // FIXME: References to parameters fall through to the behavior
+ // below. They should be able to reference a frame index since
+ // they are stored to the stack, so we can get "ld rx, offset(r1)"
+ // instead of "addi ry, r1, offset / ld rx, 0(ry)". Obj will
+ // just contain the parameter. Try to handle this with a FI.
+
+ // Try to get this in a register if nothing else has worked.
+ if (Addr.Base.Reg == 0)
+ Addr.Base.Reg = getRegForValue(Obj);
+
+ // Prevent assignment of base register to X0, which is inappropriate
+ // for loads and stores alike.
+ if (Addr.Base.Reg != 0)
+ MRI.setRegClass(Addr.Base.Reg, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ return Addr.Base.Reg != 0;
+}
+
+// Fix up some addresses that can't be used directly. For example, if
+// an offset won't fit in an instruction field, we may need to move it
+// into an index register.
+void PPCFastISel::PPCSimplifyAddress(Address &Addr, MVT VT, bool &UseOffset,
+ unsigned &IndexReg) {
+
+ // Check whether the offset fits in the instruction field.
+ if (!isInt<16>(Addr.Offset))
+ UseOffset = false;
+
+ // If this is a stack pointer and the offset needs to be simplified then
+ // put the alloca address into a register, set the base type back to
+ // register and continue. This should almost never happen.
+ if (!UseOffset && Addr.BaseType == Address::FrameIndexBase) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(Addr.Base.FI).addImm(0);
+ Addr.Base.Reg = ResultReg;
+ Addr.BaseType = Address::RegBase;
+ }
+
+ if (!UseOffset) {
+ IntegerType *OffsetTy = ((VT == MVT::i32) ? Type::getInt32Ty(*Context)
+ : Type::getInt64Ty(*Context));
+ const ConstantInt *Offset =
+ ConstantInt::getSigned(OffsetTy, (int64_t)(Addr.Offset));
+ IndexReg = PPCMaterializeInt(Offset, MVT::i64);
+ assert(IndexReg && "Unexpected error in PPCMaterializeInt!");
+ }
+}
+
+// Emit a load instruction if possible, returning true if we succeeded,
+// otherwise false. See commentary below for how the register class of
+// the load is determined.
+bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
+ const TargetRegisterClass *RC,
+ bool IsZExt, unsigned FP64LoadOpc) {
+ unsigned Opc;
+ bool UseOffset = true;
+
+ // If ResultReg is given, it determines the register class of the load.
+ // Otherwise, RC is the register class to use. If the result of the
+ // load isn't anticipated in this block, both may be zero, in which
+ // case we must make a conservative guess. In particular, don't assign
+ // R0 or X0 to the result register, as the result may be used in a load,
+ // store, add-immediate, or isel that won't permit this. (Though
+ // perhaps the spill and reload of live-exit values would handle this?)
+ const TargetRegisterClass *UseRC =
+ (ResultReg ? MRI.getRegClass(ResultReg) :
+ (RC ? RC :
+ (VT == MVT::f64 ? &PPC::F8RCRegClass :
+ (VT == MVT::f32 ? &PPC::F4RCRegClass :
+ (VT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass)))));
+
+ bool Is32BitInt = UseRC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
+ break;
+ case MVT::i16:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LHZ : PPC::LHZ8) :
+ (Is32BitInt ? PPC::LHA : PPC::LHA8));
+ break;
+ case MVT::i32:
+ Opc = (IsZExt ?
+ (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
+ (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
+ if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
+ UseOffset = false;
+ break;
+ case MVT::i64:
+ Opc = PPC::LD;
+ assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
+ "64-bit load with 32-bit target??");
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::LFS;
+ break;
+ case MVT::f64:
+ Opc = FP64LoadOpc;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+ if (ResultReg == 0)
+ ResultReg = createResultReg(UseRC);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOLoad, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset) {
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ } else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::LBZ: Opc = PPC::LBZX; break;
+ case PPC::LBZ8: Opc = PPC::LBZX8; break;
+ case PPC::LHZ: Opc = PPC::LHZX; break;
+ case PPC::LHZ8: Opc = PPC::LHZX8; break;
+ case PPC::LHA: Opc = PPC::LHAX; break;
+ case PPC::LHA8: Opc = PPC::LHAX8; break;
+ case PPC::LWZ: Opc = PPC::LWZX; break;
+ case PPC::LWZ8: Opc = PPC::LWZX8; break;
+ case PPC::LWA: Opc = PPC::LWAX; break;
+ case PPC::LWA_32: Opc = PPC::LWAX_32; break;
+ case PPC::LD: Opc = PPC::LDX; break;
+ case PPC::LFS: Opc = PPC::LFSX; break;
+ case PPC::LFD: Opc = PPC::LFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a load instruction.
+bool PPCFastISel::SelectLoad(const Instruction *I) {
+ // FIXME: No atomic loads are supported.
+ if (cast<LoadInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(I->getType(), VT))
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(0), Addr))
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. This is necessary
+ // to constrain RA from using R0/X0 when this is not legal.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC))
+ return false;
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Emit a store instruction to store SrcReg at Addr.
+bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
+ assert(SrcReg && "Nothing to store!");
+ unsigned Opc;
+ bool UseOffset = true;
+
+ const TargetRegisterClass *RC = MRI.getRegClass(SrcReg);
+ bool Is32BitInt = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ switch (VT.SimpleTy) {
+ default: // e.g., vector types not handled
+ return false;
+ case MVT::i8:
+ Opc = Is32BitInt ? PPC::STB : PPC::STB8;
+ break;
+ case MVT::i16:
+ Opc = Is32BitInt ? PPC::STH : PPC::STH8;
+ break;
+ case MVT::i32:
+ assert(Is32BitInt && "Not GPRC for i32??");
+ Opc = PPC::STW;
+ break;
+ case MVT::i64:
+ Opc = PPC::STD;
+ UseOffset = ((Addr.Offset & 3) == 0);
+ break;
+ case MVT::f32:
+ Opc = PPC::STFS;
+ break;
+ case MVT::f64:
+ Opc = PPC::STFD;
+ break;
+ }
+
+ // If necessary, materialize the offset into a register and use
+ // the indexed form. Also handle stack pointers with special needs.
+ unsigned IndexReg = 0;
+ PPCSimplifyAddress(Addr, VT, UseOffset, IndexReg);
+
+ // Note: If we still have a frame index here, we know the offset is
+ // in range, as otherwise PPCSimplifyAddress would have converted it
+ // into a RegBase.
+ if (Addr.BaseType == Address::FrameIndexBase) {
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(Addr.Base.FI, Addr.Offset),
+ MachineMemOperand::MOStore, MFI.getObjectSize(Addr.Base.FI),
+ MFI.getObjectAlignment(Addr.Base.FI));
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc)).addReg(SrcReg)
+ .addImm(Addr.Offset).addFrameIndex(Addr.Base.FI).addMemOperand(MMO);
+
+ // Base reg with offset in range.
+ } else if (UseOffset)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
+
+ // Indexed form.
+ else {
+ // Get the RR opcode corresponding to the RI one. FIXME: It would be
+ // preferable to use the ImmToIdxMap from PPCRegisterInfo.cpp, but it
+ // is hard to get at.
+ switch (Opc) {
+ default: llvm_unreachable("Unexpected opcode!");
+ case PPC::STB: Opc = PPC::STBX; break;
+ case PPC::STH : Opc = PPC::STHX; break;
+ case PPC::STW : Opc = PPC::STWX; break;
+ case PPC::STB8: Opc = PPC::STBX8; break;
+ case PPC::STH8: Opc = PPC::STHX8; break;
+ case PPC::STW8: Opc = PPC::STWX8; break;
+ case PPC::STD: Opc = PPC::STDX; break;
+ case PPC::STFS: Opc = PPC::STFSX; break;
+ case PPC::STFD: Opc = PPC::STFDX; break;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc))
+ .addReg(SrcReg).addReg(Addr.Base.Reg).addReg(IndexReg);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select a store instruction.
+bool PPCFastISel::SelectStore(const Instruction *I) {
+ Value *Op0 = I->getOperand(0);
+ unsigned SrcReg = 0;
+
+ // FIXME: No atomics loads are supported.
+ if (cast<StoreInst>(I)->isAtomic())
+ return false;
+
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(Op0->getType(), VT))
+ return false;
+
+ // Get the value to be stored into a register.
+ SrcReg = getRegForValue(Op0);
+ if (SrcReg == 0)
+ return false;
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(I->getOperand(1), Addr))
+ return false;
+
+ if (!PPCEmitStore(VT, SrcReg, Addr))
+ return false;
+
+ return true;
+}
+
+// Attempt to fast-select a branch instruction.
+bool PPCFastISel::SelectBranch(const Instruction *I) {
+ const BranchInst *BI = cast<BranchInst>(I);
+ MachineBasicBlock *BrBB = FuncInfo.MBB;
+ MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
+ MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+
+ // For now, just try the simplest case where it's fed by a compare.
+ if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
+ Optional<PPC::Predicate> OptPPCPred = getComparePred(CI->getPredicate());
+ if (!OptPPCPred)
+ return false;
+
+ PPC::Predicate PPCPred = OptPPCPred.getValue();
+
+ // Take advantage of fall-through opportunities.
+ if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
+ std::swap(TBB, FBB);
+ PPCPred = PPC::InvertPredicate(PPCPred);
+ }
+
+ unsigned CondReg = createResultReg(&PPC::CRRCRegClass);
+
+ if (!PPCEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned(),
+ CondReg))
+ return false;
+
+ BuildMI(*BrBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCC))
+ .addImm(PPCPred).addReg(CondReg).addMBB(TBB);
+ FastEmitBranch(FBB, DL);
+ FuncInfo.MBB->addSuccessor(TBB);
+ return true;
+
+ } else if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(BI->getCondition())) {
+ uint64_t Imm = CI->getZExtValue();
+ MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
+ FastEmitBranch(Target, DL);
+ return true;
+ }
+
+ // FIXME: ARM looks for a case where the block containing the compare
+ // has been split from the block containing the branch. If this happens,
+ // there is a vreg available containing the result of the compare. I'm
+ // not sure we can do much, as we've lost the predicate information with
+ // the compare instruction -- we have a 4-bit CR but don't know which bit
+ // to test here.
+ return false;
+}
+
+// Attempt to emit a compare of the two source values. Signed and unsigned
+// comparisons are supported. Return false if we can't handle it.
+bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
+ bool IsZExt, unsigned DestReg) {
+ Type *Ty = SrcValue1->getType();
+ EVT SrcEVT = TLI.getValueType(Ty, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ // See if operand 2 is an immediate encodeable in the compare.
+ // FIXME: Operands are not in canonical order at -O0, so an immediate
+ // operand in position 1 is a lost opportunity for now. We are
+ // similar to ARM in this regard.
+ long Imm = 0;
+ bool UseImm = false;
+
+ // Only 16-bit integer constants can be represented in compares for
+ // PowerPC. Others will be materialized into a register.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
+ if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
+ SrcVT == MVT::i8 || SrcVT == MVT::i1) {
+ const APInt &CIVal = ConstInt->getValue();
+ Imm = (IsZExt) ? (long)CIVal.getZExtValue() : (long)CIVal.getSExtValue();
+ if ((IsZExt && isUInt<16>(Imm)) || (!IsZExt && isInt<16>(Imm)))
+ UseImm = true;
+ }
+ }
+
+ unsigned CmpOpc;
+ bool NeedsExt = false;
+ switch (SrcVT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ CmpOpc = PPC::FCMPUS;
+ break;
+ case MVT::f64:
+ CmpOpc = PPC::FCMPUD;
+ break;
+ case MVT::i1:
+ case MVT::i8:
+ case MVT::i16:
+ NeedsExt = true;
+ // Intentional fall-through.
+ case MVT::i32:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLW : PPC::CMPW;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLWI : PPC::CMPWI;
+ break;
+ case MVT::i64:
+ if (!UseImm)
+ CmpOpc = IsZExt ? PPC::CMPLD : PPC::CMPD;
+ else
+ CmpOpc = IsZExt ? PPC::CMPLDI : PPC::CMPDI;
+ break;
+ }
+
+ unsigned SrcReg1 = getRegForValue(SrcValue1);
+ if (SrcReg1 == 0)
+ return false;
+
+ unsigned SrcReg2 = 0;
+ if (!UseImm) {
+ SrcReg2 = getRegForValue(SrcValue2);
+ if (SrcReg2 == 0)
+ return false;
+ }
+
+ if (NeedsExt) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg1, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg1 = ExtReg;
+
+ if (!UseImm) {
+ unsigned ExtReg = createResultReg(&PPC::GPRCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg2, MVT::i32, ExtReg, IsZExt))
+ return false;
+ SrcReg2 = ExtReg;
+ }
+ }
+
+ if (!UseImm)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc), DestReg)
+ .addReg(SrcReg1).addImm(Imm);
+
+ return true;
+}
+
+// Attempt to fast-select a floating-point extend instruction.
+bool PPCFastISel::SelectFPExt(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f32 || DestVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // No code is generated for a FP extend.
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select a floating-point truncate instruction.
+bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::f64 || DestVT != MVT::f32)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // Round the result to single precision.
+ unsigned DestReg = createResultReg(&PPC::F4RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP), DestReg)
+ .addReg(SrcReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move an i32 or i64 value in a GPR to an f64 value in an FPR.
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+// FIXME: The code here is sloppy for the 4-byte case. Can use a 4-byte
+// stack slot and 4-byte store/load sequence. Or just sext the 4-byte
+// case to 8 bytes which produces tighter code but wastes stack space.
+unsigned PPCFastISel::PPCMoveToFPReg(MVT SrcVT, unsigned SrcReg,
+ bool IsSigned) {
+
+ // If necessary, extend 32-bit int to 64-bit.
+ if (SrcVT == MVT::i32) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(MVT::i32, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return 0;
+ SrcReg = TmpReg;
+ }
+
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the GPR.
+ if (!PPCEmitStore(MVT::i64, SrcReg, Addr))
+ return 0;
+
+ // Load the integer value into an FPR. The kind of load used depends
+ // on a number of conditions.
+ unsigned LoadOpc = PPC::LFD;
+
+ if (SrcVT == MVT::i32) {
+ Addr.Offset = 4;
+ if (!IsSigned)
+ LoadOpc = PPC::LFIWZX;
+ else if (PPCSubTarget.hasLFIWAX())
+ LoadOpc = PPC::LFIWAX;
+ }
+
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(MVT::f64, ResultReg, Addr, RC, !IsSigned, LoadOpc))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select an integer-to-floating-point conversion.
+bool PPCFastISel::SelectIToFP(const Instruction *I, bool IsSigned) {
+ MVT DstVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::f32 && DstVT != MVT::f64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ EVT SrcEVT = TLI.getValueType(Src->getType(), true);
+ if (!SrcEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 &&
+ SrcVT != MVT::i32 && SrcVT != MVT::i64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // We can only lower an unsigned convert if we have the newer
+ // floating-point conversion operations.
+ if (!IsSigned && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // FIXME: For now we require the newer floating-point conversion operations
+ // (which are present only on P7 and A2 server models) when converting
+ // to single-precision float. Otherwise we have to generate a lot of
+ // fiddly code to avoid double rounding. If necessary, the fiddly code
+ // can be found in PPCTargetLowering::LowerINT_TO_FP().
+ if (DstVT == MVT::f32 && !PPCSubTarget.hasFPCVT())
+ return false;
+
+ // Extend the input if necessary.
+ if (SrcVT == MVT::i8 || SrcVT == MVT::i16) {
+ unsigned TmpReg = createResultReg(&PPC::G8RCRegClass);
+ if (!PPCEmitIntExt(SrcVT, SrcReg, MVT::i64, TmpReg, !IsSigned))
+ return false;
+ SrcVT = MVT::i64;
+ SrcReg = TmpReg;
+ }
+
+ // Move the integer value to an FPR.
+ unsigned FPReg = PPCMoveToFPReg(SrcVT, SrcReg, IsSigned);
+ if (FPReg == 0)
+ return false;
+
+ // Determine the opcode for the conversion.
+ const TargetRegisterClass *RC = &PPC::F8RCRegClass;
+ unsigned DestReg = createResultReg(RC);
+ unsigned Opc;
+
+ if (DstVT == MVT::f32)
+ Opc = IsSigned ? PPC::FCFIDS : PPC::FCFIDUS;
+ else
+ Opc = IsSigned ? PPC::FCFID : PPC::FCFIDU;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(FPReg);
+
+ UpdateValueMap(I, DestReg);
+ return true;
+}
+
+// Move the floating-point value in SrcReg into an integer destination
+// register, and return the register (or zero if we can't handle it).
+// FIXME: When direct register moves are implemented (see PowerISA 2.08),
+// those should be used instead of moving via a stack slot when the
+// subtarget permits.
+unsigned PPCFastISel::PPCMoveToIntReg(const Instruction *I, MVT VT,
+ unsigned SrcReg, bool IsSigned) {
+ // Get a stack slot 8 bytes wide, aligned on an 8-byte boundary.
+ // Note that if have STFIWX available, we could use a 4-byte stack
+ // slot for i32, but this being fast-isel we'll just go with the
+ // easiest code gen possible.
+ Address Addr;
+ Addr.BaseType = Address::FrameIndexBase;
+ Addr.Base.FI = MFI.CreateStackObject(8, 8, false);
+
+ // Store the value from the FPR.
+ if (!PPCEmitStore(MVT::f64, SrcReg, Addr))
+ return 0;
+
+ // Reload it into a GPR. If we want an i32, modify the address
+ // to have a 4-byte offset so we load from the right place.
+ if (VT == MVT::i32)
+ Addr.Offset = 4;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ AssignedReg ? MRI.getRegClass(AssignedReg) : 0;
+
+ unsigned ResultReg = 0;
+ if (!PPCEmitLoad(VT, ResultReg, Addr, RC, !IsSigned))
+ return 0;
+
+ return ResultReg;
+}
+
+// Attempt to fast-select a floating-point-to-integer conversion.
+bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
+ MVT DstVT, SrcVT;
+ Type *DstTy = I->getType();
+ if (!isTypeLegal(DstTy, DstVT))
+ return false;
+
+ if (DstVT != MVT::i32 && DstVT != MVT::i64)
+ return false;
+
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+ if (!isTypeLegal(SrcTy, SrcVT))
+ return false;
+
+ if (SrcVT != MVT::f32 && SrcVT != MVT::f64)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (SrcReg == 0)
+ return false;
+
+ // Convert f32 to f64 if necessary. This is just a meaningless copy
+ // to get the register class right. COPY_TO_REGCLASS is needed since
+ // a COPY from F4RC to F8RC is converted to a F4RC-F4RC copy downstream.
+ const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
+ if (InRC == &PPC::F4RCRegClass) {
+ unsigned TmpReg = createResultReg(&PPC::F8RCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY_TO_REGCLASS), TmpReg)
+ .addReg(SrcReg).addImm(PPC::F8RCRegClassID);
+ SrcReg = TmpReg;
+ }
+
+ // Determine the opcode for the conversion, which takes place
+ // entirely within FPRs.
+ unsigned DestReg = createResultReg(&PPC::F8RCRegClass);
+ unsigned Opc;
+
+ if (DstVT == MVT::i32)
+ if (IsSigned)
+ Opc = PPC::FCTIWZ;
+ else
+ Opc = PPCSubTarget.hasFPCVT() ? PPC::FCTIWUZ : PPC::FCTIDZ;
+ else
+ Opc = IsSigned ? PPC::FCTIDZ : PPC::FCTIDUZ;
+
+ // Generate the convert.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Now move the integer value from a float register to an integer register.
+ unsigned IntReg = PPCMoveToIntReg(I, DstVT, DestReg, IsSigned);
+ if (IntReg == 0)
+ return false;
+
+ UpdateValueMap(I, IntReg);
+ return true;
+}
+
+// Attempt to fast-select a binary integer operation that isn't already
+// handled automatically.
+bool PPCFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ // We can get here in the case when we have a binary operation on a non-legal
+ // type and the target independent selector doesn't know how to handle it.
+ if (DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ // Look at the currently assigned register for this instruction
+ // to determine the required register class. If there is no register,
+ // make a conservative choice (don't assign R0).
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ &PPC::GPRC_and_GPRC_NOR0RegClass);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ unsigned Opc;
+ switch (ISDOpcode) {
+ default: return false;
+ case ISD::ADD:
+ Opc = IsGPRC ? PPC::ADD4 : PPC::ADD8;
+ break;
+ case ISD::OR:
+ Opc = IsGPRC ? PPC::OR : PPC::OR8;
+ break;
+ case ISD::SUB:
+ Opc = IsGPRC ? PPC::SUBF : PPC::SUBF8;
+ break;
+ }
+
+ unsigned ResultReg = createResultReg(RC ? RC : &PPC::G8RCRegClass);
+ unsigned SrcReg1 = getRegForValue(I->getOperand(0));
+ if (SrcReg1 == 0) return false;
+
+ // Handle case of small immediate operand.
+ if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(1))) {
+ const APInt &CIVal = ConstInt->getValue();
+ int Imm = (int)CIVal.getSExtValue();
+ bool UseImm = true;
+ if (isInt<16>(Imm)) {
+ switch (Opc) {
+ default:
+ llvm_unreachable("Missing case!");
+ case PPC::ADD4:
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ break;
+ case PPC::ADD8:
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ break;
+ case PPC::OR:
+ Opc = PPC::ORI;
+ break;
+ case PPC::OR8:
+ Opc = PPC::ORI8;
+ break;
+ case PPC::SUBF:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI;
+ MRI.setRegClass(SrcReg1, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ case PPC::SUBF8:
+ if (Imm == -32768)
+ UseImm = false;
+ else {
+ Opc = PPC::ADDI8;
+ MRI.setRegClass(SrcReg1, &PPC::G8RC_and_G8RC_NOX0RegClass);
+ Imm = -Imm;
+ }
+ break;
+ }
+
+ if (UseImm) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addImm(Imm);
+ UpdateValueMap(I, ResultReg);
+ return true;
+ }
+ }
+ }
+
+ // Reg-reg case.
+ unsigned SrcReg2 = getRegForValue(I->getOperand(1));
+ if (SrcReg2 == 0) return false;
+
+ // Reverse operands for subtract-from.
+ if (ISDOpcode == ISD::SUB)
+ std::swap(SrcReg1, SrcReg2);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
+ .addReg(SrcReg1).addReg(SrcReg2);
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Handle arguments to a call that we're attempting to fast-select.
+// Return false if the arguments are too complex for us at the moment.
+bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args,
+ SmallVectorImpl<unsigned> &ArgRegs,
+ SmallVectorImpl<MVT> &ArgVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
+ SmallVectorImpl<unsigned> &RegArgs,
+ CallingConv::ID CC,
+ unsigned &NumBytes,
+ bool IsVarArg) {
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
+ CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS);
+
+ // Bail out if we can't handle any of the arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Skip vector arguments for now, as well as long double and
+ // uint128_t, and anything that isn't passed in a register.
+ if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64 ||
+ !VA.isRegLoc() || VA.needsCustom())
+ return false;
+
+ // Skip bit-converted arguments for now.
+ if (VA.getLocInfo() == CCValAssign::BCvt)
+ return false;
+ }
+
+ // Get a count of how many bytes are to be pushed onto the stack.
+ NumBytes = CCInfo.getNextStackOffset();
+
+ // Issue CALLSEQ_START.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameSetupOpcode()))
+ .addImm(NumBytes);
+
+ // Prepare to assign register arguments. Every argument uses up a
+ // GPR protocol register even if it's passed in a floating-point
+ // register.
+ unsigned NextGPR = PPC::X3;
+ unsigned NextFPR = PPC::F1;
+
+ // Process arguments.
+ for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
+ CCValAssign &VA = ArgLocs[I];
+ unsigned Arg = ArgRegs[VA.getValNo()];
+ MVT ArgVT = ArgVTs[VA.getValNo()];
+
+ // Handle argument promotion and bitcasts.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/false))
+ llvm_unreachable("Failed to emit a sext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ MVT DestVT = VA.getLocVT();
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(ArgVT, Arg, DestVT, TmpReg, /*IsZExt*/true))
+ llvm_unreachable("Failed to emit a zext!");
+ ArgVT = DestVT;
+ Arg = TmpReg;
+ break;
+ }
+ case CCValAssign::BCvt: {
+ // FIXME: Not yet handled.
+ llvm_unreachable("Should have bailed before getting here!");
+ break;
+ }
+ }
+
+ // Copy this argument to the appropriate register.
+ unsigned ArgReg;
+ if (ArgVT == MVT::f32 || ArgVT == MVT::f64) {
+ ArgReg = NextFPR++;
+ ++NextGPR;
+ } else
+ ArgReg = NextGPR++;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ArgReg).addReg(Arg);
+ RegArgs.push_back(ArgReg);
+ }
+
+ return true;
+}
+
+// For a call that we've determined we can fast-select, finish the
+// call sequence and generate a copy to obtain the return value (if any).
+void PPCFastISel::finishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
+ const Instruction *I, CallingConv::ID CC,
+ unsigned &NumBytes, bool IsVarArg) {
+ // Issue CallSEQ_END.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TII.getCallFrameDestroyOpcode()))
+ .addImm(NumBytes).addImm(0);
+
+ // Next, generate a copy to obtain the return value.
+ // FIXME: No multi-register return values yet, though I don't foresee
+ // any real difficulties there.
+ if (RetVT != MVT::isVoid) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ CCValAssign &VA = RVLocs[0];
+ assert(RVLocs.size() == 1 && "No support for multi-reg return values!");
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ MVT DestVT = VA.getValVT();
+ MVT CopyVT = DestVT;
+
+ // Ints smaller than a register still arrive in a full 64-bit
+ // register, so make sure we recognize this.
+ if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32)
+ CopyVT = MVT::i64;
+
+ unsigned SourcePhysReg = VA.getLocReg();
+ unsigned ResultReg = 0;
+
+ if (RetVT == CopyVT) {
+ const TargetRegisterClass *CpyRC = TLI.getRegClassFor(CopyVT);
+ ResultReg = createResultReg(CpyRC);
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+
+ // If necessary, round the floating result to single precision.
+ } else if (CopyVT == MVT::f64) {
+ ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::FRSP),
+ ResultReg).addReg(SourcePhysReg);
+
+ // If only the low half of a general register is needed, generate
+ // a GPRC copy instead of a G8RC copy. (EXTRACT_SUBREG can't be
+ // used along the fast-isel path (not lowered), and downstream logic
+ // also doesn't like a direct subreg copy on a physical reg.)
+ } else if (RetVT == MVT::i8 || RetVT == MVT::i16 || RetVT == MVT::i32) {
+ ResultReg = createResultReg(&PPC::GPRCRegClass);
+ // Convert physical register from G8RC to GPRC.
+ SourcePhysReg -= PPC::X0 - PPC::R0;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), ResultReg)
+ .addReg(SourcePhysReg);
+ }
+
+ assert(ResultReg && "ResultReg unset!");
+ UsedRegs.push_back(SourcePhysReg);
+ UpdateValueMap(I, ResultReg);
+ }
+}
+
+// Attempt to fast-select a call instruction.
+bool PPCFastISel::SelectCall(const Instruction *I) {
+ const CallInst *CI = cast<CallInst>(I);
+ const Value *Callee = CI->getCalledValue();
+
+ // Can't handle inline asm.
+ if (isa<InlineAsm>(Callee))
+ return false;
+
+ // Allow SelectionDAG isel to handle tail calls.
+ if (CI->isTailCall())
+ return false;
+
+ // Obtain calling convention.
+ ImmutableCallSite CS(CI);
+ CallingConv::ID CC = CS.getCallingConv();
+
+ PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
+ FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ bool IsVarArg = FTy->isVarArg();
+
+ // Not ready for varargs yet.
+ if (IsVarArg)
+ return false;
+
+ // Handle simple calls for now, with legal return types and
+ // those that can be extended.
+ Type *RetTy = I->getType();
+ MVT RetVT;
+ if (RetTy->isVoidTy())
+ RetVT = MVT::isVoid;
+ else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
+ RetVT != MVT::i8)
+ return false;
+
+ // FIXME: No multi-register return values yet.
+ if (RetVT != MVT::isVoid && RetVT != MVT::i8 && RetVT != MVT::i16 &&
+ RetVT != MVT::i32 && RetVT != MVT::i64 && RetVT != MVT::f32 &&
+ RetVT != MVT::f64) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
+ CCInfo.AnalyzeCallResult(RetVT, RetCC_PPC64_ELF_FIS);
+ if (RVLocs.size() > 1)
+ return false;
+ }
+
+ // Bail early if more than 8 arguments, as we only currently
+ // handle arguments passed in registers.
+ unsigned NumArgs = CS.arg_size();
+ if (NumArgs > 8)
+ return false;
+
+ // Set up the argument vectors.
+ SmallVector<Value*, 8> Args;
+ SmallVector<unsigned, 8> ArgRegs;
+ SmallVector<MVT, 8> ArgVTs;
+ SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
+
+ Args.reserve(NumArgs);
+ ArgRegs.reserve(NumArgs);
+ ArgVTs.reserve(NumArgs);
+ ArgFlags.reserve(NumArgs);
+
+ for (ImmutableCallSite::arg_iterator II = CS.arg_begin(), IE = CS.arg_end();
+ II != IE; ++II) {
+ // FIXME: ARM does something for intrinsic calls here, check into that.
+
+ unsigned AttrIdx = II - CS.arg_begin() + 1;
+
+ // Only handle easy calls for now. It would be reasonably easy
+ // to handle <= 8-byte structures passed ByVal in registers, but we
+ // have to ensure they are right-justified in the register.
+ if (CS.paramHasAttr(AttrIdx, Attribute::InReg) ||
+ CS.paramHasAttr(AttrIdx, Attribute::StructRet) ||
+ CS.paramHasAttr(AttrIdx, Attribute::Nest) ||
+ CS.paramHasAttr(AttrIdx, Attribute::ByVal))
+ return false;
+
+ ISD::ArgFlagsTy Flags;
+ if (CS.paramHasAttr(AttrIdx, Attribute::SExt))
+ Flags.setSExt();
+ if (CS.paramHasAttr(AttrIdx, Attribute::ZExt))
+ Flags.setZExt();
+
+ Type *ArgTy = (*II)->getType();
+ MVT ArgVT;
+ if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8)
+ return false;
+
+ if (ArgVT.isVector())
+ return false;
+
+ unsigned Arg = getRegForValue(*II);
+ if (Arg == 0)
+ return false;
+
+ unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
+ Args.push_back(*II);
+ ArgRegs.push_back(Arg);
+ ArgVTs.push_back(ArgVT);
+ ArgFlags.push_back(Flags);
+ }
+
+ // Process the arguments.
+ SmallVector<unsigned, 8> RegArgs;
+ unsigned NumBytes;
+
+ if (!processCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
+ RegArgs, CC, NumBytes, IsVarArg))
+ return false;
+
+ // FIXME: No handling for function pointers yet. This requires
+ // implementing the function descriptor (OPD) setup.
+ const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
+ if (!GV)
+ return false;
+
+ // Build direct call with NOP for TOC restore.
+ // FIXME: We can and should optimize away the NOP for local calls.
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BL8_NOP));
+ // Add callee.
+ MIB.addGlobalAddress(GV);
+
+ // Add implicit physical register uses to the call.
+ for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
+ MIB.addReg(RegArgs[II], RegState::Implicit);
+
+ // Add a register mask with the call-preserved registers. Proper
+ // defs for return values will be added by setPhysRegsDeadExcept().
+ MIB.addRegMask(TRI.getCallPreservedMask(CC));
+
+ // Finish off the call including any return values.
+ SmallVector<unsigned, 4> UsedRegs;
+ finishCall(RetVT, UsedRegs, I, CC, NumBytes, IsVarArg);
+
+ // Set all unused physregs defs as dead.
+ static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
+
+ return true;
+}
+
+// Attempt to fast-select a return instruction.
+bool PPCFastISel::SelectRet(const Instruction *I) {
+
+ if (!FuncInfo.CanLowerReturn)
+ return false;
+
+ const ReturnInst *Ret = cast<ReturnInst>(I);
+ const Function &F = *I->getParent()->getParent();
+
+ // Build a list of return value registers.
+ SmallVector<unsigned, 4> RetRegs;
+ CallingConv::ID CC = F.getCallingConv();
+
+ if (Ret->getNumOperands() > 0) {
+ SmallVector<ISD::OutputArg, 4> Outs;
+ GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ValLocs;
+ CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs, *Context);
+ CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
+ const Value *RV = Ret->getOperand(0);
+
+ // FIXME: Only one output register for now.
+ if (ValLocs.size() > 1)
+ return false;
+
+ // Special case for returning a constant integer of any size.
+ // Materialize the constant as an i64 and copy it to the return
+ // register. This avoids an unnecessary extend or truncate.
+ if (isa<ConstantInt>(*RV)) {
+ const Constant *C = cast<Constant>(RV);
+ unsigned SrcReg = PPCMaterializeInt(C, MVT::i64);
+ unsigned RetReg = ValLocs[0].getLocReg();
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ RetReg).addReg(SrcReg);
+ RetRegs.push_back(RetReg);
+
+ } else {
+ unsigned Reg = getRegForValue(RV);
+
+ if (Reg == 0)
+ return false;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i < ValLocs.size(); ++i) {
+
+ CCValAssign &VA = ValLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+ RetRegs.push_back(VA.getLocReg());
+ unsigned SrcReg = Reg + VA.getValNo();
+
+ EVT RVEVT = TLI.getValueType(RV->getType());
+ if (!RVEVT.isSimple())
+ return false;
+ MVT RVVT = RVEVT.getSimpleVT();
+ MVT DestVT = VA.getLocVT();
+
+ if (RVVT != DestVT && RVVT != MVT::i8 &&
+ RVVT != MVT::i16 && RVVT != MVT::i32)
+ return false;
+
+ if (RVVT != DestVT) {
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ llvm_unreachable("Full value assign but types don't match?");
+ case CCValAssign::AExt:
+ case CCValAssign::ZExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, true))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ case CCValAssign::SExt: {
+ const TargetRegisterClass *RC =
+ (DestVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
+ unsigned TmpReg = createResultReg(RC);
+ if (!PPCEmitIntExt(RVVT, SrcReg, DestVT, TmpReg, false))
+ return false;
+ SrcReg = TmpReg;
+ break;
+ }
+ }
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::COPY), RetRegs[i])
+ .addReg(SrcReg);
+ }
+ }
+ }
+
+ MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::BLR));
+
+ for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
+ MIB.addReg(RetRegs[i], RegState::Implicit);
+
+ return true;
+}
+
+// Attempt to emit an integer extend of SrcReg into DestReg. Both
+// signed and zero extensions are supported. Return false if we
+// can't handle it.
+bool PPCFastISel::PPCEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
+ unsigned DestReg, bool IsZExt) {
+ if (DestVT != MVT::i32 && DestVT != MVT::i64)
+ return false;
+ if (SrcVT != MVT::i8 && SrcVT != MVT::i16 && SrcVT != MVT::i32)
+ return false;
+
+ // Signed extensions use EXTSB, EXTSH, EXTSW.
+ if (!IsZExt) {
+ unsigned Opc;
+ if (SrcVT == MVT::i8)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSB : PPC::EXTSB8_32_64;
+ else if (SrcVT == MVT::i16)
+ Opc = (DestVT == MVT::i32) ? PPC::EXTSH : PPC::EXTSH8_32_64;
+ else {
+ assert(DestVT == MVT::i64 && "Signed extend from i32 to i32??");
+ Opc = PPC::EXTSW_32_64;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addReg(SrcReg);
+
+ // Unsigned 32-bit extensions use RLWINM.
+ } else if (DestVT == MVT::i32) {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 24;
+ else {
+ assert(SrcVT == MVT::i16 && "Unsigned extend from i32 to i32??");
+ MB = 16;
+ }
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLWINM),
+ DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB).addImm(/*ME=*/31);
+
+ // Unsigned 64-bit extensions use RLDICL (with a 32-bit source).
+ } else {
+ unsigned MB;
+ if (SrcVT == MVT::i8)
+ MB = 56;
+ else if (SrcVT == MVT::i16)
+ MB = 48;
+ else
+ MB = 32;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(PPC::RLDICL_32_64), DestReg)
+ .addReg(SrcReg).addImm(/*SH=*/0).addImm(MB);
+ }
+
+ return true;
+}
+
+// Attempt to fast-select an indirect branch instruction.
+bool PPCFastISel::SelectIndirectBr(const Instruction *I) {
+ unsigned AddrReg = getRegForValue(I->getOperand(0));
+ if (AddrReg == 0)
+ return false;
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::MTCTR8))
+ .addReg(AddrReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::BCTR8));
+
+ const IndirectBrInst *IB = cast<IndirectBrInst>(I);
+ for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
+ FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
+
+ return true;
+}
+
+// Attempt to fast-select an integer truncate instruction.
+bool PPCFastISel::SelectTrunc(const Instruction *I) {
+ Value *Src = I->getOperand(0);
+ EVT SrcVT = TLI.getValueType(Src->getType(), true);
+ EVT DestVT = TLI.getValueType(I->getType(), true);
+
+ if (SrcVT != MVT::i64 && SrcVT != MVT::i32 && SrcVT != MVT::i16)
+ return false;
+
+ if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
+ return false;
+
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg)
+ return false;
+
+ // The only interesting case is when we need to switch register classes.
+ if (SrcVT == MVT::i64) {
+ unsigned ResultReg = createResultReg(&PPC::GPRCRegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+ ResultReg).addReg(SrcReg, 0, PPC::sub_32);
+ SrcReg = ResultReg;
+ }
+
+ UpdateValueMap(I, SrcReg);
+ return true;
+}
+
+// Attempt to fast-select an integer extend instruction.
+bool PPCFastISel::SelectIntExt(const Instruction *I) {
+ Type *DestTy = I->getType();
+ Value *Src = I->getOperand(0);
+ Type *SrcTy = Src->getType();
+
+ bool IsZExt = isa<ZExtInst>(I);
+ unsigned SrcReg = getRegForValue(Src);
+ if (!SrcReg) return false;
+
+ EVT SrcEVT, DestEVT;
+ SrcEVT = TLI.getValueType(SrcTy, true);
+ DestEVT = TLI.getValueType(DestTy, true);
+ if (!SrcEVT.isSimple())
+ return false;
+ if (!DestEVT.isSimple())
+ return false;
+
+ MVT SrcVT = SrcEVT.getSimpleVT();
+ MVT DestVT = DestEVT.getSimpleVT();
+
+ // If we know the register class needed for the result of this
+ // instruction, use it. Otherwise pick the register class of the
+ // correct size that does not contain X0/R0, since we don't know
+ // whether downstream uses permit that assignment.
+ unsigned AssignedReg = FuncInfo.ValueMap[I];
+ const TargetRegisterClass *RC =
+ (AssignedReg ? MRI.getRegClass(AssignedReg) :
+ (DestVT == MVT::i64 ? &PPC::G8RC_and_G8RC_NOX0RegClass :
+ &PPC::GPRC_and_GPRC_NOR0RegClass));
+ unsigned ResultReg = createResultReg(RC);
+
+ if (!PPCEmitIntExt(SrcVT, SrcReg, DestVT, ResultReg, IsZExt))
+ return false;
+
+ UpdateValueMap(I, ResultReg);
+ return true;
+}
+
+// Attempt to fast-select an instruction that wasn't handled by
+// the table-generated machinery.
+bool PPCFastISel::TargetSelectInstruction(const Instruction *I) {
+
+ switch (I->getOpcode()) {
+ case Instruction::Load:
+ return SelectLoad(I);
+ case Instruction::Store:
+ return SelectStore(I);
+ case Instruction::Br:
+ return SelectBranch(I);
+ case Instruction::IndirectBr:
+ return SelectIndirectBr(I);
+ case Instruction::FPExt:
+ return SelectFPExt(I);
+ case Instruction::FPTrunc:
+ return SelectFPTrunc(I);
+ case Instruction::SIToFP:
+ return SelectIToFP(I, /*IsSigned*/ true);
+ case Instruction::UIToFP:
+ return SelectIToFP(I, /*IsSigned*/ false);
+ case Instruction::FPToSI:
+ return SelectFPToI(I, /*IsSigned*/ true);
+ case Instruction::FPToUI:
+ return SelectFPToI(I, /*IsSigned*/ false);
+ case Instruction::Add:
+ return SelectBinaryIntOp(I, ISD::ADD);
+ case Instruction::Or:
+ return SelectBinaryIntOp(I, ISD::OR);
+ case Instruction::Sub:
+ return SelectBinaryIntOp(I, ISD::SUB);
+ case Instruction::Call:
+ if (dyn_cast<IntrinsicInst>(I))
+ return false;
+ return SelectCall(I);
+ case Instruction::Ret:
+ return SelectRet(I);
+ case Instruction::Trunc:
+ return SelectTrunc(I);
+ case Instruction::ZExt:
+ case Instruction::SExt:
+ return SelectIntExt(I);
+ // Here add other flavors of Instruction::XXX that automated
+ // cases don't catch. For example, switches are terminators
+ // that aren't yet handled.
+ default:
+ break;
+ }
+ return false;
+}
+
+// Materialize a floating-point constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
+ // No plans to handle long double here.
+ if (VT != MVT::f32 && VT != MVT::f64)
+ return 0;
+
+ // All FP constants are loaded from the constant pool.
+ unsigned Align = TD.getPrefTypeAlignment(CFP->getType());
+ assert(Align > 0 && "Unexpectedly missing alignment information!");
+ unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
+ unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ MachineMemOperand *MMO =
+ FuncInfo.MF->getMachineMemOperand(
+ MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
+ (VT == MVT::f32) ? 4 : 8, Align);
+
+ unsigned Opc = (VT == MVT::f32) ? PPC::LFS : PPC::LFD;
+ unsigned TmpReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ // For small code model, generate a LF[SD](0, LDtocCPT(Idx, X2)).
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault) {
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocCPT),
+ TmpReg)
+ .addConstantPoolIndex(Idx).addReg(PPC::X2);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg).addMemOperand(MMO);
+ } else {
+ // Otherwise we generate LF[SD](Idx[lo], ADDIStocHA(X2, Idx)).
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ TmpReg).addReg(PPC::X2).addConstantPoolIndex(Idx);
+ // But for large code model, we must generate a LDtocL followed
+ // by the LF[SD].
+ if (CModel == CodeModel::Large) {
+ unsigned TmpReg2 = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addImm(0).addReg(TmpReg2);
+ } else
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg)
+ .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
+ .addReg(TmpReg)
+ .addMemOperand(MMO);
+ }
+
+ return DestReg;
+}
+
+// Materialize the address of a global value into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) {
+ assert(VT == MVT::i64 && "Non-address!");
+ const TargetRegisterClass *RC = &PPC::G8RC_and_G8RC_NOX0RegClass;
+ unsigned DestReg = createResultReg(RC);
+
+ // Global values may be plain old object addresses, TLS object
+ // addresses, constant pool entries, or jump tables. How we generate
+ // code for these may depend on small, medium, or large code model.
+ CodeModel::Model CModel = TM.getCodeModel();
+
+ // FIXME: Jump tables are not yet required because fast-isel doesn't
+ // handle switches; if that changes, we need them as well. For now,
+ // what follows assumes everything's a generic (or TLS) global address.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+ if (!GVar) {
+ // If GV is an alias, use the aliasee for determining thread-locality.
+ if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+ GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
+ }
+
+ // FIXME: We don't yet handle the complexity of TLS.
+ bool IsTLS = GVar && GVar->isThreadLocal();
+ if (IsTLS)
+ return 0;
+
+ // For small code model, generate a simple TOC load.
+ if (CModel == CodeModel::Small || CModel == CodeModel::JITDefault)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtoc), DestReg)
+ .addGlobalAddress(GV).addReg(PPC::X2);
+ else {
+ // If the address is an externally defined symbol, a symbol with
+ // common or externally available linkage, a function address, or a
+ // jump table address (not yet needed), or if we are generating code
+ // for large code model, we generate:
+ // LDtocL(GV, ADDIStocHA(%X2, GV))
+ // Otherwise we generate:
+ // ADDItocL(ADDIStocHA(%X2, GV), GV)
+ // Either way, start with the ADDIStocHA:
+ unsigned HighPartReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDIStocHA),
+ HighPartReg).addReg(PPC::X2).addGlobalAddress(GV);
+
+ // !GVar implies a function address. An external variable is one
+ // without an initializer.
+ // If/when switches are implemented, jump tables should be handled
+ // on the "if" path here.
+ if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() ||
+ GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage())
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::LDtocL),
+ DestReg).addGlobalAddress(GV).addReg(HighPartReg);
+ else
+ // Otherwise generate the ADDItocL.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDItocL),
+ DestReg).addReg(HighPartReg).addGlobalAddress(GV);
+ }
+
+ return DestReg;
+}
+
+// Materialize a 32-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned Lo = Imm & 0xFFFF;
+ unsigned Hi = (Imm >> 16) & 0xFFFF;
+
+ unsigned ResultReg = createResultReg(RC);
+ bool IsGPRC = RC->hasSuperClassEq(&PPC::GPRCRegClass);
+
+ if (isInt<16>(Imm))
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(IsGPRC ? PPC::LI : PPC::LI8), ResultReg)
+ .addImm(Imm);
+ else if (Lo) {
+ // Both Lo and Hi have nonzero bits.
+ unsigned TmpReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), TmpReg)
+ .addImm(Hi);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(IsGPRC ? PPC::ORI : PPC::ORI8), ResultReg)
+ .addReg(TmpReg).addImm(Lo);
+ } else
+ // Just Hi bits.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
+ .addImm(Hi);
+
+ return ResultReg;
+}
+
+// Materialize a 64-bit integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterialize64BitInt(int64_t Imm,
+ const TargetRegisterClass *RC) {
+ unsigned Remainder = 0;
+ unsigned Shift = 0;
+
+ // If the value doesn't fit in 32 bits, see if we can shift it
+ // so that it fits in 32 bits.
+ if (!isInt<32>(Imm)) {
+ Shift = countTrailingZeros<uint64_t>(Imm);
+ int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
+
+ if (isInt<32>(ImmSh))
+ Imm = ImmSh;
+ else {
+ Remainder = Imm;
+ Shift = 32;
+ Imm >>= 32;
+ }
+ }
+
+ // Handle the high-order 32 bits (if shifted) or the whole 32 bits
+ // (if not shifted).
+ unsigned TmpReg1 = PPCMaterialize32BitInt(Imm, RC);
+ if (!Shift)
+ return TmpReg1;
+
+ // If upper 32 bits were not zero, we've built them and need to shift
+ // them into place.
+ unsigned TmpReg2;
+ if (Imm) {
+ TmpReg2 = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::RLDICR),
+ TmpReg2).addReg(TmpReg1).addImm(Shift).addImm(63 - Shift);
+ } else
+ TmpReg2 = TmpReg1;
+
+ unsigned TmpReg3, Hi, Lo;
+ if ((Hi = (Remainder >> 16) & 0xFFFF)) {
+ TmpReg3 = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORIS8),
+ TmpReg3).addReg(TmpReg2).addImm(Hi);
+ } else
+ TmpReg3 = TmpReg2;
+
+ if ((Lo = Remainder & 0xFFFF)) {
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ORI8),
+ ResultReg).addReg(TmpReg3).addImm(Lo);
+ return ResultReg;
+ }
+
+ return TmpReg3;
+}
+
+
+// Materialize an integer constant into a register, and return
+// the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::PPCMaterializeInt(const Constant *C, MVT VT) {
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+
+ // If the constant is in range, use a load-immediate.
+ const ConstantInt *CI = cast<ConstantInt>(C);
+ if (isInt<16>(CI->getSExtValue())) {
+ unsigned Opc = (VT == MVT::i64) ? PPC::LI8 : PPC::LI;
+ unsigned ImmReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg)
+ .addImm(CI->getSExtValue());
+ return ImmReg;
+ }
+
+ // Construct the constant piecewise.
+ int64_t Imm = CI->getZExtValue();
+
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else if (VT == MVT::i32)
+ return PPCMaterialize32BitInt(Imm, RC);
+
+ return 0;
+}
+
+// Materialize a constant into a register, and return the register
+// number (or zero if we failed to handle it).
+unsigned PPCFastISel::TargetMaterializeConstant(const Constant *C) {
+ EVT CEVT = TLI.getValueType(C->getType(), true);
+
+ // Only handle simple types.
+ if (!CEVT.isSimple()) return 0;
+ MVT VT = CEVT.getSimpleVT();
+
+ if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
+ return PPCMaterializeFP(CFP, VT);
+ else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+ return PPCMaterializeGV(GV, VT);
+ else if (isa<ConstantInt>(C))
+ return PPCMaterializeInt(C, VT);
+
+ return 0;
+}
+
+// Materialize the address created by an alloca into a register, and
+// return the register number (or zero if we failed to handle it).
+unsigned PPCFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
+ // Don't handle dynamic allocas.
+ if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
+
+ MVT VT;
+ if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
+
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ unsigned ResultReg = createResultReg(&PPC::G8RC_and_G8RC_NOX0RegClass);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(PPC::ADDI8),
+ ResultReg).addFrameIndex(SI->second).addImm(0);
+ return ResultReg;
+ }
+
+ return 0;
+}
+
+// Fold loads into extends when possible.
+// FIXME: We can have multiple redundant extend/trunc instructions
+// following a load. The folding only picks up one. Extend this
+// to check subsequent instructions for the same pattern and remove
+// them. Thus ResultReg should be the def reg for the last redundant
+// instruction in a chain, and all intervening instructions can be
+// removed from parent. Change test/CodeGen/PowerPC/fast-isel-fold.ll
+// to add ELF64-NOT: rldicl to the appropriate tests when this works.
+bool PPCFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
+ const LoadInst *LI) {
+ // Verify we have a legal type before going any further.
+ MVT VT;
+ if (!isLoadTypeLegal(LI->getType(), VT))
+ return false;
+
+ // Combine load followed by zero- or sign-extend.
+ bool IsZExt = false;
+ switch(MI->getOpcode()) {
+ default:
+ return false;
+
+ case PPC::RLDICL:
+ case PPC::RLDICL_32_64: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 56) ||
+ (VT == MVT::i16 && MB <= 48) ||
+ (VT == MVT::i32 && MB <= 32))
+ break;
+ return false;
+ }
+
+ case PPC::RLWINM:
+ case PPC::RLWINM8: {
+ IsZExt = true;
+ unsigned MB = MI->getOperand(3).getImm();
+ if ((VT == MVT::i8 && MB <= 24) ||
+ (VT == MVT::i16 && MB <= 16))
+ break;
+ return false;
+ }
+
+ case PPC::EXTSB:
+ case PPC::EXTSB8:
+ case PPC::EXTSB8_32_64:
+ /* There is no sign-extending load-byte instruction. */
+ return false;
+
+ case PPC::EXTSH:
+ case PPC::EXTSH8:
+ case PPC::EXTSH8_32_64: {
+ if (VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+
+ case PPC::EXTSW:
+ case PPC::EXTSW_32_64: {
+ if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8)
+ return false;
+ break;
+ }
+ }
+
+ // See if we can handle this address.
+ Address Addr;
+ if (!PPCComputeAddress(LI->getOperand(0), Addr))
+ return false;
+
+ unsigned ResultReg = MI->getOperand(0).getReg();
+
+ if (!PPCEmitLoad(VT, ResultReg, Addr, 0, IsZExt))
+ return false;
+
+ MI->eraseFromParent();
+ return true;
+}
+
+// Attempt to lower call arguments in a faster way than done by
+// the selection DAG code.
+bool PPCFastISel::FastLowerArguments() {
+ // Defer to normal argument lowering for now. It's reasonably
+ // efficient. Consider doing something like ARM to handle the
+ // case where all args fit in registers, no varargs, no float
+ // or vector args.
+ return false;
+}
+
+// Handle materializing integer constants into a register. This is not
+// automatically generated for PowerPC, so must be explicitly created here.
+unsigned PPCFastISel::FastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
+
+ if (Opc != ISD::Constant)
+ return 0;
+
+ if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
+ VT != MVT::i8 && VT != MVT::i1)
+ return 0;
+
+ const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
+ &PPC::GPRCRegClass);
+ if (VT == MVT::i64)
+ return PPCMaterialize64BitInt(Imm, RC);
+ else
+ return PPCMaterialize32BitInt(Imm, RC);
+}
+
+// Override for ADDI and ADDI8 to set the correct register class
+// on RHS operand 0. The automatic infrastructure naively assumes
+// GPRC for i32 and G8RC for i64; the concept of "no R0" is lost
+// for these cases. At the moment, none of the other automatically
+// generated RI instructions require special treatment. However, once
+// SelectSelect is implemented, "isel" requires similar handling.
+//
+// Also be conservative about the output register class. Avoid
+// assigning R0 or X0 to the output register for GPRC and G8RC
+// register classes, as any such result could be used in ADDI, etc.,
+// where those regs have another meaning.
+unsigned PPCFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
+ const TargetRegisterClass *RC,
+ unsigned Op0, bool Op0IsKill,
+ uint64_t Imm) {
+ if (MachineInstOpcode == PPC::ADDI)
+ MRI.setRegClass(Op0, &PPC::GPRC_and_GPRC_NOR0RegClass);
+ else if (MachineInstOpcode == PPC::ADDI8)
+ MRI.setRegClass(Op0, &PPC::G8RC_and_G8RC_NOX0RegClass);
+
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_ri(MachineInstOpcode, UseRC,
+ Op0, Op0IsKill, Imm);
+}
+
+// Override for instructions with one register operand to avoid use of
+// R0/X0. The automatic infrastructure isn't aware of the context so
+// we must be conservative.
+unsigned PPCFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_r(MachineInstOpcode, UseRC, Op0, Op0IsKill);
+}
+
+// Override for instructions with two register operands to avoid use
+// of R0/X0. The automatic infrastructure isn't aware of the context
+// so we must be conservative.
+unsigned PPCFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
+ const TargetRegisterClass* RC,
+ unsigned Op0, bool Op0IsKill,
+ unsigned Op1, bool Op1IsKill) {
+ const TargetRegisterClass *UseRC =
+ (RC == &PPC::GPRCRegClass ? &PPC::GPRC_and_GPRC_NOR0RegClass :
+ (RC == &PPC::G8RCRegClass ? &PPC::G8RC_and_G8RC_NOX0RegClass : RC));
+
+ return FastISel::FastEmitInst_rr(MachineInstOpcode, UseRC, Op0, Op0IsKill,
+ Op1, Op1IsKill);
+}
+
+namespace llvm {
+ // Create the fast instruction selector for PowerPC64 ELF.
+ FastISel *PPC::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) {
+ const TargetMachine &TM = FuncInfo.MF->getTarget();
+
+ // Only available on 64-bit ELF for now.
+ const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
+ if (Subtarget->isPPC64() && Subtarget->isSVR4ABI())
+ return new PPCFastISel(FuncInfo, LibInfo);
+
+ return 0;
+ }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index c845909..0ac2ced 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -26,17 +26,6 @@
using namespace llvm;
-// FIXME This disables some code that aligns the stack to a boundary bigger than
-// the default (16 bytes on Darwin) when there is a stack local of greater
-// alignment. This does not currently work, because the delta between old and
-// new stack pointers is added to offsets that reference incoming parameters
-// after the prolog is generated, and the code that does that doesn't handle a
-// variable delta. You don't want to do that anyway; a better approach is to
-// reserve another register that retains to the incoming stack pointer, and
-// reference parameters relative to that.
-#define ALIGN_STACK 0
-
-
/// VRRegNo - Map from a numbered VR register to its enum value.
///
static const uint16_t VRRegNo[] = {
@@ -215,11 +204,13 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
unsigned FrameSize =
UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
- // Get the alignments provided by the target, and the maximum alignment
- // (if any) of the fixed frame objects.
- unsigned MaxAlign = MFI->getMaxAlignment();
- unsigned TargetAlign = getStackAlignment();
- unsigned AlignMask = TargetAlign - 1; //
+ // Get stack alignments. The frame must be aligned to the greatest of these:
+ unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
+ unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
+ unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
+
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
// If we are a leaf function, and use up to 224 bytes of stack space,
// don't have a frame pointer, calls, or dynamic alloca then we do not need
@@ -235,7 +226,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
- (!ALIGN_STACK || MaxAlign <= TargetAlign)) { // No special alignment.
+ !RegInfo->hasBasePointer(MF)) { // No special alignment.
// No need for frame
if (UpdateMF)
MFI->setStackSize(0);
@@ -305,6 +296,12 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ bool HasBP = RegInfo->hasBasePointer(MF);
+ unsigned BPReg = HasBP ? (unsigned) PPC::R30 : FPReg;
+ unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FPReg;
+
for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
BI != BE; ++BI)
for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
@@ -321,6 +318,13 @@ void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
case PPC::FP8:
MO.setReg(FP8Reg);
break;
+ case PPC::BP:
+ MO.setReg(BPReg);
+ break;
+ case PPC::BP8:
+ MO.setReg(BP8Reg);
+ break;
+
}
}
}
@@ -332,18 +336,29 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
MachineModuleInfo &MMI = MF.getMMI();
+ const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
DebugLoc dl;
bool needsFrameMoves = MMI.hasDebugInfo() ||
MF.getFunction()->needsUnwindTableEntry();
+ // Get processor type.
+ bool isPPC64 = Subtarget.isPPC64();
+ // Get the ABI.
+ bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+ assert((isDarwinABI || isSVR4ABI) &&
+ "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
+
// Prepare for frame info.
MCSymbol *FrameLabel = 0;
// Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
// process it.
- if (!Subtarget.isSVR4ABI())
+ if (!isSVR4ABI)
for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
HandleVRSaveUpdate(MBBI, TII);
@@ -357,26 +372,58 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Work out frame sizes.
unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
+ if (!isInt<32>(NegFrameSize))
+ llvm_unreachable("Unhandled stack size!");
if (MFI->isFrameAddressTaken())
replaceFPWithRealFP(MF);
- // Get processor type.
- bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
- bool isDarwinABI = Subtarget.isDarwinABI();
// Check if the link register (LR) must be saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
- const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
- // Do we have a frame pointer for this function?
+ const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ // Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
+
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
+ const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
+ : PPC::MFLR );
+ const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
+ : PPC::STW );
+ const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
+ : PPC::STWU );
+ const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
+ : PPC::STWUX);
+ const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
+ : PPC::OR );
+ const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
+ : PPC::SUBFC);
+ const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
+ : PPC::SUBFIC);
+
+ // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
+ // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
+ // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
+ // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
+ assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
+ "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
assert(FPIndex && "No Frame Pointer Save Slot!");
@@ -386,136 +433,130 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
}
}
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR8), PPC::X0);
-
- if (!MustSaveCRs.empty()) {
- MachineInstrBuilder MIB =
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), PPC::X12);
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+ int BPOffset = 0;
+ if (HasBP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset =
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
}
-
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X31)
- .addImm(FPOffset/4)
- .addReg(PPC::X1);
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STD))
- .addReg(PPC::X0)
- .addImm(LROffset / 4)
- .addReg(PPC::X1);
-
- if (!MustSaveCRs.empty())
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
- .addReg(PPC::X12, getKillRegState(true))
- .addImm(8)
- .addReg(PPC::X1);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MFLR), PPC::R0);
-
- if (HasFP)
- // FIXME: On PPC32 SVR4, FPOffset is negative and access to negative
- // offsets of R1 is not allowed.
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R31)
- .addImm(FPOffset)
- .addReg(PPC::R1);
-
- assert(MustSaveCRs.empty() &&
- "Prologue CR saving supported only in 64-bit mode");
-
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STW))
- .addReg(PPC::R0)
- .addImm(LROffset)
- .addReg(PPC::R1);
}
- // Skip if a leaf routine.
- if (!FrameSize) return;
-
// Get stack alignments.
- unsigned TargetAlign = getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
+ if (HasBP && MaxAlign > 1)
+ assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
+ "Invalid alignment!");
+
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
+ bool isLargeFrame = !isInt<16>(NegFrameSize);
+
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
+
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Prologue CR saving supported only in 64-bit mode");
+
+ if (!MustSaveCRs.empty()) { // will only occur for PPC64
+ MachineInstrBuilder MIB =
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MFCR8), TempReg);
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ MIB.addReg(MustSaveCRs[i], RegState::ImplicitKill);
+ }
+
+ if (HasFP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
+
+ if (HasBP)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
+
+ if (MustSaveLR)
+ // FIXME: On PPC32 SVR4, we must not spill before claiming the stackframe.
+ BuildMI(MBB, MBBI, dl, StoreInst)
+ .addReg(ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
+
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
+ .addReg(TempReg, getKillRegState(true))
+ .addImm(8)
+ .addReg(SPReg);
+
+ // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
+ if (!FrameSize) return;
// Adjust stack pointer: r1 += NegFrameSize.
// If there is a preferred stack alignment, align R1 now
- if (!isPPC64) {
- // PPC32.
- if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
- assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
-
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), PPC::R0)
- .addReg(PPC::R1)
+
+ if (HasBP) {
+ // Save a copy of r1 as the base pointer.
+ BuildMI(MBB, MBBI, dl, OrInst, BPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
+ }
+
+ if (HasBP && MaxAlign > 1) {
+ if (isPPC64)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
+ .addReg(SPReg)
+ .addImm(0)
+ .addImm(64 - Log2_32(MaxAlign));
+ else // PPC32...
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2_32(MaxAlign))
.addImm(31);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC) ,PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWU), PPC::R1)
- .addReg(PPC::R1)
- .addImm(NegFrameSize)
- .addReg(PPC::R1);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
.addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
+ .addReg(TempReg, RegState::Kill)
.addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STWUX), PPC::R1)
- .addReg(PPC::R1, RegState::Kill)
- .addReg(PPC::R1)
- .addReg(PPC::R0);
+ BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(TempReg, RegState::Kill);
}
- } else { // PPC64.
- if (ALIGN_STACK && MaxAlign > TargetAlign) {
- assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
- "Invalid alignment!");
- assert(isInt<16>(NegFrameSize) && "Unhandled stack size and alignment!");
-
- BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), PPC::X0)
- .addReg(PPC::X1)
- .addImm(0)
- .addImm(64 - Log2_32(MaxAlign));
- BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBFIC8), PPC::X0)
- .addReg(PPC::X0)
- .addImm(NegFrameSize);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- } else if (isInt<16>(NegFrameSize)) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDU), PPC::X1)
- .addReg(PPC::X1)
- .addImm(NegFrameSize / 4)
- .addReg(PPC::X1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
- .addImm(NegFrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
- .addImm(NegFrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::STDUX), PPC::X1)
- .addReg(PPC::X1, RegState::Kill)
- .addReg(PPC::X1)
- .addReg(PPC::X0);
- }
- }
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
- std::vector<MachineMove> &Moves = MMI.getFrameMoves();
+ } else if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
+ .addReg(SPReg)
+ .addImm(NegFrameSize)
+ .addReg(SPReg);
+
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
+ .addImm(NegFrameSize >> 16);
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addImm(NegFrameSize & 0xFFFF);
+ BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
+ .addReg(SPReg, RegState::Kill)
+ .addReg(SPReg)
+ .addReg(ScratchReg);
+ }
// Add the "machine moves" for the instructions we generated above, but in
// reverse order.
@@ -525,25 +566,26 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(FrameLabel);
// Show update of SP.
- if (NegFrameSize) {
- MachineLocation SPDst(MachineLocation::VirtualFP);
- MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
- Moves.push_back(MachineMove(FrameLabel, SPDst, SPSrc));
- } else {
- MachineLocation SP(isPPC64 ? PPC::X31 : PPC::R31);
- Moves.push_back(MachineMove(FrameLabel, SP, SP));
- }
+ assert(NegFrameSize);
+ MMI.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(FrameLabel, NegFrameSize));
if (HasFP) {
- MachineLocation FPDst(MachineLocation::VirtualFP, FPOffset);
- MachineLocation FPSrc(isPPC64 ? PPC::X31 : PPC::R31);
- Moves.push_back(MachineMove(FrameLabel, FPDst, FPSrc));
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
+ MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel, Reg, FPOffset));
+ }
+
+ if (HasBP) {
+ unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
+ MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel, Reg, BPOffset));
}
if (MustSaveLR) {
- MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
- MachineLocation LRSrc(isPPC64 ? PPC::LR8 : PPC::LR);
- Moves.push_back(MachineMove(FrameLabel, LRDst, LRSrc));
+ unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
+ MMI.addFrameInst(
+ MCCFIInstruction::createOffset(FrameLabel, Reg, LROffset));
}
}
@@ -551,15 +593,9 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
- if (!isPPC64) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR), PPC::R31)
- .addReg(PPC::R1)
- .addReg(PPC::R1);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::OR8), PPC::X31)
- .addReg(PPC::X1)
- .addReg(PPC::X1);
- }
+ BuildMI(MBB, MBBI, dl, OrInst, FPReg)
+ .addReg(SPReg)
+ .addReg(SPReg);
if (needsFrameMoves) {
ReadyLabel = MMI.getContext().CreateTempSymbol();
@@ -567,10 +603,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// Mark effective beginning of when frame pointer is ready.
BuildMI(MBB, MBBI, dl, TII.get(PPC::PROLOG_LABEL)).addSym(ReadyLabel);
- MachineLocation FPDst(HasFP ? (isPPC64 ? PPC::X31 : PPC::R31) :
- (isPPC64 ? PPC::X1 : PPC::R1));
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabel, FPDst, FPSrc));
+ unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
+ MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(ReadyLabel, Reg));
}
}
@@ -590,26 +624,21 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const {
// For SVR4, don't emit a move for the CR spill slot if we haven't
// spilled CRs.
- if (Subtarget.isSVR4ABI()
- && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
- && MustSaveCRs.empty())
- continue;
+ if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
+ && MustSaveCRs.empty())
+ continue;
// For 64-bit SVR4 when we have spilled CRs, the spill location
// is SP+8, not a frame-relative slot.
- if (Subtarget.isSVR4ABI()
- && Subtarget.isPPC64()
- && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
- MachineLocation CSDst(PPC::X1, 8);
- MachineLocation CSSrc(PPC::CR2);
- Moves.push_back(MachineMove(Label, CSDst, CSSrc));
- continue;
+ if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
+ MMI.addFrameInst(MCCFIInstruction::createOffset(
+ Label, MRI->getDwarfRegNum(PPC::CR2, true), 8));
+ continue;
}
int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(Label, CSDst, CSSrc));
+ MMI.addFrameInst(MCCFIInstruction::createOffset(
+ Label, MRI->getDwarfRegNum(Reg, true), Offset));
}
}
}
@@ -620,6 +649,8 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
assert(MBBI != MBB.end() && "Returning block has no terminator");
const PPCInstrInfo &TII =
*static_cast<const PPCInstrInfo*>(MF.getTarget().getInstrInfo());
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
unsigned RetOpcode = MBBI->getOpcode();
DebugLoc dl;
@@ -633,30 +664,49 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
RetOpcode == PPC::TCRETURNai8) &&
"Can only insert epilog into returning blocks");
- // Get alignment info so we know how to restore r1
+ // Get alignment info so we know how to restore the SP.
const MachineFrameInfo *MFI = MF.getFrameInfo();
- unsigned TargetAlign = getStackAlignment();
- unsigned MaxAlign = MFI->getMaxAlignment();
// Get the number of bytes allocated from the FrameInfo.
int FrameSize = MFI->getStackSize();
// Get processor type.
bool isPPC64 = Subtarget.isPPC64();
- // Get operating system
+ // Get the ABI.
bool isDarwinABI = Subtarget.isDarwinABI();
+ bool isSVR4ABI = Subtarget.isSVR4ABI();
+
// Check if the link register (LR) has been saved.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
bool MustSaveLR = FI->mustSaveLR();
- const SmallVector<unsigned, 3> &MustSaveCRs = FI->getMustSaveCRs();
- // Do we have a frame pointer for this function?
+ const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
+ // Do we have a frame pointer and/or base pointer for this function?
bool HasFP = hasFP(MF);
+ bool HasBP = RegInfo->hasBasePointer(MF);
+
+ unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
+ unsigned BPReg = isPPC64 ? PPC::X30 : PPC::R30;
+ unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
+ unsigned ScratchReg = isPPC64 ? PPC::X0 : PPC::R0;
+ unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
+ const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
+ : PPC::MTLR );
+ const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
+ : PPC::LWZ );
+ const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
+ : PPC::LIS );
+ const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
+ : PPC::ORI );
+ const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
+ : PPC::ADDI );
+ const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
+ : PPC::ADD4 );
int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI);
int FPOffset = 0;
if (HasFP) {
- if (Subtarget.isSVR4ABI()) {
+ if (isSVR4ABI) {
MachineFrameInfo *FFI = MF.getFrameInfo();
int FPIndex = FI->getFramePointerSaveIndex();
assert(FPIndex && "No Frame Pointer Save Slot!");
@@ -666,6 +716,19 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
+ int BPOffset = 0;
+ if (HasBP) {
+ if (isSVR4ABI) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ int BPIndex = FI->getBasePointerSaveIndex();
+ assert(BPIndex && "No Base Pointer Save Slot!");
+ BPOffset = FFI->getObjectOffset(BPIndex);
+ } else {
+ BPOffset =
+ PPCFrameLowering::getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ }
+ }
+
bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
RetOpcode == PPC::TCRETURNdi ||
RetOpcode == PPC::TCRETURNai ||
@@ -687,98 +750,76 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
FrameSize += StackAdj;
}
+ // Frames of 32KB & larger require special handling because they cannot be
+ // indexed into with a simple LD/LWZ immediate offset operand.
+ bool isLargeFrame = !isInt<16>(FrameSize);
+
if (FrameSize) {
- // The loaded (or persistent) stack pointer value is offset by the 'stwu'
- // on entry to the function. Add this offset back now.
- if (!isPPC64) {
- // If this function contained a fastcc call and GuaranteedTailCallOpt is
- // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
- // call which invalidates the stack pointer value in SP(0). So we use the
- // value of R31 in this case.
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS), PPC::R0)
- .addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI), PPC::R0)
- .addReg(PPC::R0, RegState::Kill)
- .addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD4))
- .addReg(PPC::R1)
- .addReg(PPC::R31)
- .addReg(PPC::R0);
- } else if (isInt<16>(FrameSize) &&
- (!ALIGN_STACK || TargetAlign >= MaxAlign) &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), PPC::R1)
- .addReg(PPC::R1).addImm(FrameSize);
+ // In the prologue, the loaded (or persistent) stack pointer value is offset
+ // by the STDU/STDUX/STWU/STWUX instruction. Add this offset back now.
+
+ // If this function contained a fastcc call and GuaranteedTailCallOpt is
+ // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
+ // call which invalidates the stack pointer value in SP(0). So we use the
+ // value of R31 in this case.
+ if (FI->hasFastCall()) {
+ assert(HasFP && "Expecting a valid frame pointer.");
+ if (!isLargeFrame) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(FPReg).addImm(FrameSize);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ),PPC::R1)
- .addImm(0).addReg(PPC::R1);
- }
- } else {
- if (FI->hasFastCall() && isInt<16>(FrameSize)) {
- assert(hasFP(MF) && "Expecting a valid the frame pointer.");
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X31).addImm(FrameSize);
- } else if(FI->hasFastCall()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LIS8), PPC::X0)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(FrameSize >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ORI8), PPC::X0)
- .addReg(PPC::X0, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(FrameSize & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADD8))
- .addReg(PPC::X1)
- .addReg(PPC::X31)
- .addReg(PPC::X0);
- } else if (isInt<16>(FrameSize) && TargetAlign >= MaxAlign &&
- !MFI->hasVarSizedObjects()) {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI8), PPC::X1)
- .addReg(PPC::X1).addImm(FrameSize);
- } else {
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X1)
- .addImm(0).addReg(PPC::X1);
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
+ .addReg(FPReg)
+ .addReg(ScratchReg);
}
+ } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg)
+ .addImm(FrameSize);
+ } else {
+ BuildMI(MBB, MBBI, dl, LoadInst, SPReg)
+ .addImm(0)
+ .addReg(SPReg);
}
- }
- if (isPPC64) {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X0)
- .addImm(LROffset/4).addReg(PPC::X1);
+ }
- if (!MustSaveCRs.empty())
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), PPC::X12)
- .addImm(8).addReg(PPC::X1);
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
+ .addImm(LROffset)
+ .addReg(SPReg);
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LD), PPC::X31)
- .addImm(FPOffset/4).addReg(PPC::X1);
+ assert((isPPC64 || MustSaveCRs.empty()) &&
+ "Epilogue CR restoring supported only in 64-bit mode");
- if (!MustSaveCRs.empty())
- for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTCRF8), MustSaveCRs[i])
- .addReg(PPC::X12, getKillRegState(i == e-1));
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
+ .addImm(8)
+ .addReg(SPReg);
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR8)).addReg(PPC::X0);
- } else {
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R0)
- .addImm(LROffset).addReg(PPC::R1);
+ if (HasFP)
+ BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
+ .addImm(FPOffset)
+ .addReg(SPReg);
- assert(MustSaveCRs.empty() &&
- "Epilogue CR restoring supported only in 64-bit mode");
+ if (HasBP)
+ BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
+ .addImm(BPOffset)
+ .addReg(SPReg);
- if (HasFP)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ), PPC::R31)
- .addImm(FPOffset).addReg(PPC::R1);
+ if (!MustSaveCRs.empty()) // will only occur for PPC64
+ for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
+ .addReg(TempReg, getKillRegState(i == e-1));
- if (MustSaveLR)
- BuildMI(MBB, MBBI, dl, TII.get(PPC::MTLR)).addReg(PPC::R0);
- }
+ if (MustSaveLR)
+ BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
@@ -786,27 +827,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
MF.getFunction()->getCallingConv() == CallingConv::Fast) {
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
unsigned CallerAllocatedAmt = FI->getMinReservedArea();
- unsigned StackReg = isPPC64 ? PPC::X1 : PPC::R1;
- unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
- unsigned TmpReg = isPPC64 ? PPC::X0 : PPC::R0;
- unsigned ADDIInstr = isPPC64 ? PPC::ADDI8 : PPC::ADDI;
- unsigned ADDInstr = isPPC64 ? PPC::ADD8 : PPC::ADD4;
- unsigned LISInstr = isPPC64 ? PPC::LIS8 : PPC::LIS;
- unsigned ORIInstr = isPPC64 ? PPC::ORI8 : PPC::ORI;
if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
- BuildMI(MBB, MBBI, dl, TII.get(ADDIInstr), StackReg)
- .addReg(StackReg).addImm(CallerAllocatedAmt);
+ BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
+ .addReg(SPReg).addImm(CallerAllocatedAmt);
} else {
- BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
+ BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
.addImm(CallerAllocatedAmt >> 16);
- BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
- .addReg(TmpReg, RegState::Kill)
+ BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
+ .addReg(ScratchReg, RegState::Kill)
.addImm(CallerAllocatedAmt & 0xFFFF);
- BuildMI(MBB, MBBI, dl, TII.get(ADDInstr))
- .addReg(StackReg)
+ BuildMI(MBB, MBBI, dl, AddInst)
+ .addReg(SPReg)
.addReg(FPReg)
- .addReg(TmpReg);
+ .addReg(ScratchReg);
}
} else if (RetOpcode == PPC::TCRETURNdi) {
MBBI = MBB.getLastNonDebugInstr();
@@ -854,7 +888,8 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {
- const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
// Save and clear the LR state.
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
@@ -879,6 +914,15 @@ PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
FI->setFramePointerSaveIndex(FPSI);
}
+ int BPSI = FI->getBasePointerSaveIndex();
+ if (!BPSI && RegInfo->hasBasePointer(MF)) {
+ int BPOffset = getBasePointerSaveOffset(isPPC64, isDarwinABI);
+ // Allocate the frame index for the base pointer save area.
+ BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
+ // Save the result.
+ FI->setBasePointerSaveIndex(BPSI);
+ }
+
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
if (MF.getTarget().Options.GuaranteedTailCallOpt &&
@@ -1010,6 +1054,17 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
}
+ const PPCRegisterInfo *RegInfo =
+ static_cast<const PPCRegisterInfo*>(MF.getTarget().getRegisterInfo());
+ if (RegInfo->hasBasePointer(MF)) {
+ HasGPSaveArea = true;
+
+ int FI = PFI->getBasePointerSaveIndex();
+ assert(FI && "No Base Pointer Save Slot!");
+
+ FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+ }
+
// General register save area starts right below the Floating-point
// register save area.
if (HasGPSaveArea || HasG8SaveArea) {
@@ -1122,8 +1177,12 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
RC->getAlignment(),
false));
+ // Might we have over-aligned allocas?
+ bool HasAlVars = MFI->hasVarSizedObjects() &&
+ MFI->getMaxAlignment() > getStackAlignment();
+
// These kinds of spills might need two registers.
- if (spillsCR(MF) || spillsVRSAVE(MF))
+ if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
RC->getAlignment(),
false));
@@ -1151,6 +1210,12 @@ PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ // Only Darwin actually uses the VRSAVE register, but it can still appear
+ // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
+ // Darwin, ignore it.
+ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ continue;
+
// CR2 through CR4 are the nonvolatile CR fields.
bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
@@ -1212,7 +1277,7 @@ restoreCRs(bool isPPC64, bool is31,
MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
PPC::R12),
CSI[CSIIndex].getFrameIdx()));
- RestoreOp = PPC::MTCRF;
+ RestoreOp = PPC::MTOCRF;
MoveReg = PPC::R12;
}
@@ -1300,6 +1365,12 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ // Only Darwin actually uses the VRSAVE register, but it can still appear
+ // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
+ // Darwin, ignore it.
+ if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
+ continue;
+
if (Reg == PPC::CR2) {
CR2Spilled = true;
// The spill slot is associated only with CR2, which is the
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
index 6f5f936..7aab37e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h
@@ -94,6 +94,16 @@ public:
return isPPC64 ? -8U : -4U;
}
+ /// getBasePointerSaveOffset - Return the previous frame offset to save the
+ /// base pointer.
+ static unsigned getBasePointerSaveOffset(bool isPPC64, bool isDarwinABI) {
+ if (isDarwinABI)
+ return isPPC64 ? -16U : -8U;
+
+ // SVR4 ABI: First slot in the general register save area.
+ return isPPC64 ? -16U : -8U;
+ }
+
/// getLinkageSize - Return the size of the PowerPC ABI linkage area.
///
static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 4bf1e33..0df50e1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -71,8 +71,8 @@ void PPCScoreboardHazardRecognizer::Reset() {
// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".
//
-PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
- : TII(tii) {
+PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM)
+ : TM(TM) {
EndDispatchGroup();
}
@@ -91,7 +91,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
bool &isFirst, bool &isSingle,
bool &isCracked,
bool &isLoad, bool &isStore) {
- const MCInstrDesc &MCID = TII.get(Opcode);
+ const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode);
isLoad = MCID.mayLoad();
isStore = MCID.mayStore();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
index 55b45d0..84b8e6de 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.h
@@ -43,7 +43,7 @@ public:
/// setting the CTR register then branching through it within a dispatch group),
/// or storing then loading from the same address within a dispatch group.
class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
- const TargetInstrInfo &TII;
+ const TargetMachine &TM;
unsigned NumIssued; // Number of insts issued, including advanced cycles.
@@ -64,7 +64,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
unsigned NumStores;
public:
- PPCHazardRecognizer970(const TargetInstrInfo &TII);
+ PPCHazardRecognizer970(const TargetMachine &TM);
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 3140e53..6ba6af6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -110,13 +110,13 @@ namespace {
/// SelectCC - Select a comparison of the specified values with the
/// specified condition code, returning the CR# of the expression.
- SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, DebugLoc dl);
+ SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDLoc dl);
/// SelectAddrImm - Returns true if the address N can be represented by
/// a base register plus a signed 16-bit displacement [r+imm].
bool SelectAddrImm(SDValue N, SDValue &Disp,
SDValue &Base) {
- return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG);
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, false);
}
/// SelectAddrImmOffs - Return true if the operand is valid for a preinc
@@ -145,11 +145,11 @@ namespace {
return PPCLowering.SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
}
- /// SelectAddrImmShift - Returns true if the address N can be represented by
- /// a base register plus a signed 14-bit displacement [r+imm*4]. Suitable
- /// for use by STD and friends.
- bool SelectAddrImmShift(SDValue N, SDValue &Disp, SDValue &Base) {
- return PPCLowering.SelectAddressRegImmShift(N, Disp, Base, *CurDAG);
+ /// SelectAddrImmX4 - Returns true if the address N can be represented by
+ /// a base register plus a signed 16-bit displacement that is a multiple of 4.
+ /// Suitable for use by STD and friends.
+ bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
+ return PPCLowering.SelectAddressRegImm(N, Disp, Base, *CurDAG, true);
}
// Select an address into a single register.
@@ -330,19 +330,22 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}
bool PPCDAGToDAGISel::isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME) {
+ if (!Val)
+ return false;
+
if (isShiftedMask_32(Val)) {
// look for the first non-zero bit
- MB = CountLeadingZeros_32(Val);
+ MB = countLeadingZeros(Val);
// look for the first zero bit after the run of ones
- ME = CountLeadingZeros_32((Val - 1) ^ Val);
+ ME = countLeadingZeros((Val - 1) ^ Val);
return true;
} else {
Val = ~Val; // invert mask
if (isShiftedMask_32(Val)) {
// effectively look for the first zero bit
- ME = CountLeadingZeros_32(Val) - 1;
+ ME = countLeadingZeros(Val) - 1;
// effectively look for the first one bit after the run of zeros
- MB = CountLeadingZeros_32((Val - 1) ^ Val) + 1;
+ MB = countLeadingZeros((Val - 1) ^ Val) + 1;
return true;
}
}
@@ -397,7 +400,7 @@ bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
APInt LKZ, LKO, RKZ, RKO;
CurDAG->ComputeMaskedBits(Op0, LKZ, LKO);
@@ -435,7 +438,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
}
unsigned MB, ME;
- if (InsertMask && isRunOfOnes(InsertMask, MB, ME)) {
+ if (isRunOfOnes(InsertMask, MB, ME)) {
SDValue Tmp1, Tmp2;
if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
@@ -447,10 +450,10 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
unsigned SHOpc = Op1.getOperand(0).getOpcode();
if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) &&
isInt32Immediate(Op1.getOperand(0).getOperand(1), Value)) {
+ // Note that Value must be in range here (less than 32) because
+ // otherwise there would not be any bits set in InsertMask.
Op1 = Op1.getOperand(0).getOperand(0);
SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
- } else {
- Op1 = Op1.getOperand(0);
}
}
@@ -466,7 +469,7 @@ SDNode *PPCDAGToDAGISel::SelectBitfieldInsert(SDNode *N) {
/// SelectCC - Select a comparison of the specified values with the specified
/// condition code, returning the CR# of the expression.
SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS,
- ISD::CondCode CC, DebugLoc dl) {
+ ISD::CondCode CC, SDLoc dl) {
// Always select the LHS.
unsigned Opc;
@@ -594,12 +597,8 @@ static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC) {
/// getCRIdxForSetCC - Return the index of the condition register field
/// associated with the SetCC condition, and whether or not the field is
/// treated as inverted. That is, lt = 0; ge = 0 inverted.
-///
-/// If this returns with Other != -1, then the returned comparison is an or of
-/// two simpler comparisons. In this case, Invert is guaranteed to be false.
-static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) {
+static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
Invert = false;
- Other = -1;
switch (CC) {
default: llvm_unreachable("Unknown condition!");
case ISD::SETOLT:
@@ -710,7 +709,7 @@ static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) {
SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
unsigned Imm;
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
@@ -847,8 +846,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
}
bool Inv;
- int OtherCondIdx;
- unsigned Idx = getCRIdxForSetCC(CC, Inv, OtherCondIdx);
+ unsigned Idx = getCRIdxForSetCC(CC, Inv);
SDValue CCReg = SelectCC(LHS, RHS, CC, dl);
SDValue IntCR;
@@ -859,42 +857,25 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
InFlag).getValue(1);
- if (PPCSubTarget.hasMFOCRF() && OtherCondIdx == -1)
- IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
- CCReg), 0);
- else
- IntCR = SDValue(CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
- CR7Reg, CCReg), 0);
+ IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
+ CCReg), 0);
SDValue Ops[] = { IntCR, getI32Imm((32-(3-Idx)) & 31),
getI32Imm(31), getI32Imm(31) };
- if (OtherCondIdx == -1 && !Inv)
+ if (!Inv)
return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
// Get the specified bit.
SDValue Tmp =
SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
- if (Inv) {
- assert(OtherCondIdx == -1 && "Can't have split plus negation");
- return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
- }
-
- // Otherwise, we have to turn an operation like SETONE -> SETOLT | SETOGT.
- // We already got the bit for the first part of the comparison (e.g. SETULE).
-
- // Get the other bit of the comparison.
- Ops[1] = getI32Imm((32-(3-OtherCondIdx)) & 31);
- SDValue OtherCond =
- SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
-
- return CurDAG->SelectNodeTo(N, PPC::OR, MVT::i32, Tmp, OtherCond);
+ return CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1));
}
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
if (N->isMachineOpcode()) {
N->setNodeId(-1);
return NULL; // Already selected.
@@ -914,7 +895,7 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
// If it can't be represented as a 32 bit value.
if (!isInt<32>(Imm)) {
- Shift = CountTrailingZeros_64(Imm);
+ Shift = countTrailingZeros<uint64_t>(Imm);
int64_t ImmSh = static_cast<uint64_t>(Imm) >> Shift;
// If the shifted value fits 32 bits.
@@ -994,15 +975,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
getSmallIPtrImm(0));
}
- case PPCISD::MFCR: {
+ case PPCISD::MFOCRF: {
SDValue InFlag = N->getOperand(1);
- // Use MFOCRF if supported.
- if (PPCSubTarget.hasMFOCRF())
- return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
- N->getOperand(0), InFlag);
- else
- return CurDAG->getMachineNode(PPC::MFCRpseud, dl, MVT::i32,
- N->getOperand(0), InFlag);
+ return CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
+ N->getOperand(0), InFlag);
}
case ISD::SDIV: {
@@ -1244,6 +1220,15 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
getI32Imm(BROpc) };
return CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops, 4);
}
+ case PPCISD::BDNZ:
+ case PPCISD::BDZ: {
+ bool IsPPC64 = PPCSubTarget.isPPC64();
+ SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
+ return CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ ?
+ (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :
+ (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
+ MVT::Other, Ops, 2);
+ }
case PPCISD::COND_BRANCH: {
// Op #0 is the Chain.
// Op #1 is the PPC::PRED_* number.
@@ -1495,13 +1480,13 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
continue;
break;
case PPC::ADDIdtprelL:
- Flags = PPCII::MO_DTPREL16_LO;
+ Flags = PPCII::MO_DTPREL_LO;
break;
case PPC::ADDItlsldL:
- Flags = PPCII::MO_TLSLD16_LO;
+ Flags = PPCII::MO_TLSLD_LO;
break;
case PPC::ADDItocL:
- Flags = PPCII::MO_TOC16_LO;
+ Flags = PPCII::MO_TOC_LO;
break;
}
@@ -1521,8 +1506,16 @@ void PPCDAGToDAGISel::PostprocessISelDAG() {
// immediate operand, add it now.
if (ReplaceFlags) {
if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
+ // We can't perform this optimization for data whose alignment
+ // is insufficient for the instruction encoding.
+ if (GV->getAlignment() < 4 &&
+ (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD ||
+ StorageOpcode == PPC::LWA)) {
+ DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
+ continue;
+ }
ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags);
} else if (ConstantPoolSDNode *CP =
dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 3fcafdc..8da5f05 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16,6 +16,7 @@
#include "PPCMachineFunctionInfo.h"
#include "PPCPerfectShuffle.h"
#include "PPCTargetMachine.h"
+#include "PPCTargetObjectFile.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -36,21 +37,6 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State);
-
static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
@@ -64,14 +50,15 @@ static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) {
if (TM.getSubtargetImpl()->isDarwin())
return new TargetLoweringObjectFileMachO();
+ if (TM.getSubtargetImpl()->isSVR4ABI())
+ return new PPC64LinuxTargetObjectFile();
+
return new TargetLoweringObjectFileELF();
}
PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
: TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) {
const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>();
- PPCRegInfo = TM.getRegisterInfo();
- PPCII = TM.getInstrInfo();
setPow2DivIsCheap();
@@ -162,28 +149,24 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
Subtarget->hasFRSQRTES() && Subtarget->hasFRES()))
setOperationAction(ISD::FSQRT, MVT::f32, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
- setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ if (Subtarget->hasFCPSGN()) {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal);
+ } else {
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ }
if (Subtarget->hasFPRND()) {
setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
setOperationAction(ISD::FCEIL, MVT::f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+ setOperationAction(ISD::FROUND, MVT::f64, Legal);
setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
setOperationAction(ISD::FCEIL, MVT::f32, Legal);
setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
-
- // frin does not implement "ties to even." Thus, this is safe only in
- // fast-math mode.
- if (TM.Options.UnsafeFPMath) {
- setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
- setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
-
- // These need to set FE_INEXACT, and use a custom inserter.
- setOperationAction(ISD::FRINT, MVT::f64, Legal);
- setOperationAction(ISD::FRINT, MVT::f32, Legal);
- }
+ setOperationAction(ISD::FROUND, MVT::f32, Legal);
}
// PowerPC does not have BSWAP, CTPOP or CTTZ
@@ -241,11 +224,6 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We cannot sextinreg(i1). Expand to shifts.
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i64, Expand);
- setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
- setOperationAction(ISD::EHSELECTION, MVT::i32, Expand);
-
// NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
// SjLj exception handling but a light-weight setjmp/longjmp replacement to
// support continuation, user-level threading, and etc.. As a result, no
@@ -298,8 +276,13 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
} else
setOperationAction(ISD::VAARG, MVT::Other, Expand);
+ if (Subtarget->isSVR4ABI() && !isPPC64)
+ // VACOPY is custom lowered with the 32-bit SVR4 ABI.
+ setOperationAction(ISD::VACOPY , MVT::Other, Custom);
+ else
+ setOperationAction(ISD::VACOPY , MVT::Other, Expand);
+
// Use the default implementation.
- setOperationAction(ISD::VACOPY , MVT::Other, Expand);
setOperationAction(ISD::VAEND , MVT::Other, Expand);
setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
@@ -309,6 +292,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+ // To handle counter-based loop conditions.
+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom);
+
// Comparisons that require checking two conditions.
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
@@ -407,6 +393,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setOperationAction(ISD::UDIV, VT, Expand);
setOperationAction(ISD::UREM, VT, Expand);
setOperationAction(ISD::FDIV, VT, Expand);
+ setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FNEG, VT, Expand);
setOperationAction(ISD::FSQRT, VT, Expand);
setOperationAction(ISD::FLOG, VT, Expand);
@@ -501,6 +488,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand);
setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand);
+
+ setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
+ setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
}
if (Subtarget->has64BitSupport()) {
@@ -529,9 +519,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::BR_CC);
setTargetDAGCombine(ISD::BSWAP);
+ setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
// Use reciprocal estimates.
if (TM.Options.UnsafeFPMath) {
@@ -564,7 +556,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
setInsertFencesForAtomic(true);
- setSchedulingPreference(Sched::Hybrid);
+ if (Subtarget->enableMachineScheduler())
+ setSchedulingPreference(Sched::Source);
+ else
+ setSchedulingPreference(Sched::Hybrid);
computeRegisterProperties();
@@ -583,24 +578,47 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
}
}
+/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
+/// the desired ByVal argument alignment.
+static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign,
+ unsigned MaxMaxAlign) {
+ if (MaxAlign == MaxMaxAlign)
+ return;
+ if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
+ if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256)
+ MaxAlign = 32;
+ else if (VTy->getBitWidth() >= 128 && MaxAlign < 16)
+ MaxAlign = 16;
+ } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
+ for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+ unsigned EltAlign = 0;
+ getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign);
+ if (EltAlign > MaxAlign)
+ MaxAlign = EltAlign;
+ if (MaxAlign == MaxMaxAlign)
+ break;
+ }
+ }
+}
+
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area.
unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const {
- const TargetMachine &TM = getTargetMachine();
// Darwin passes everything on 4 byte boundary.
- if (TM.getSubtarget<PPCSubtarget>().isDarwin())
+ if (PPCSubTarget.isDarwin())
return 4;
// 16byte and wider vectors are passed on 16byte boundary.
- if (VectorType *VTy = dyn_cast<VectorType>(Ty))
- if (VTy->getBitWidth() >= 128)
- return 16;
-
// The rest is 8 on PPC64 and 4 on PPC32 boundary.
- if (PPCSubTarget.isPPC64())
- return 8;
-
- return 4;
+ unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4;
+ if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX())
+ getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16);
+ return Align;
}
const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
@@ -634,7 +652,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
- case PPCISD::MFCR: return "PPCISD::MFCR";
+ case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
case PPCISD::VCMP: return "PPCISD::VCMP";
case PPCISD::VCMPo: return "PPCISD::VCMPo";
case PPCISD::LBRX: return "PPCISD::LBRX";
@@ -642,6 +660,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::LARX: return "PPCISD::LARX";
case PPCISD::STCX: return "PPCISD::STCX";
case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
+ case PPCISD::BDNZ: return "PPCISD::BDNZ";
+ case PPCISD::BDZ: return "PPCISD::BDZ";
case PPCISD::MFFS: return "PPCISD::MFFS";
case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
@@ -662,10 +682,11 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
+ case PPCISD::SC: return "PPCISD::SC";
}
}
-EVT PPCTargetLowering::getSetCCResultType(EVT VT) const {
+EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
if (!VT.isVector())
return MVT::i32;
return VT.changeVectorElementTypeToInteger();
@@ -1036,24 +1057,68 @@ bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base,
return false;
}
+// If we happen to be doing an i64 load or store into a stack slot that has
+// less than a 4-byte alignment, then the frame-index elimination may need to
+// use an indexed load or store instruction (because the offset may not be a
+// multiple of 4). The extra register needed to hold the offset comes from the
+// register scavenger, and it is possible that the scavenger will need to use
+// an emergency spill slot. As a result, we need to make sure that a spill slot
+// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
+// stack slot.
+static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
+ // FIXME: This does not handle the LWA case.
+ if (VT != MVT::i64)
+ return;
+
+ // NOTE: We'll exclude negative FIs here, which come from argument
+ // lowering, because there are no known test cases triggering this problem
+ // using packed structures (or similar). We can remove this exclusion if
+ // we find such a test case. The reason why this is so test-case driven is
+ // because this entire 'fixup' is only to prevent crashes (from the
+ // register scavenger) on not-really-valid inputs. For example, if we have:
+ // %a = alloca i1
+ // %b = bitcast i1* %a to i64*
+ // store i64* a, i64 b
+ // then the store should really be marked as 'align 1', but is not. If it
+ // were marked as 'align 1' then the indexed form would have been
+ // instruction-selected initially, and the problem this 'fixup' is preventing
+ // won't happen regardless.
+ if (FrameIdx < 0)
+ return;
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ unsigned Align = MFI->getObjectAlignment(FrameIdx);
+ if (Align >= 4)
+ return;
+
+ PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
+ FuncInfo->setHasNonRISpills();
+}
+
/// Returns true if the address N can be represented by a base register plus
/// a signed 16-bit displacement [r+imm], and if it is not better
-/// represented as reg+reg.
+/// represented as reg+reg. If Aligned is true, only accept displacements
+/// suitable for STD and friends, i.e. multiples of 4.
bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
SDValue &Base,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG,
+ bool Aligned) const {
// FIXME dl should come from parent load or store, not from address
- DebugLoc dl = N.getDebugLoc();
+ SDLoc dl(N);
// If this can be more profitably realized as r+r, fail.
if (SelectAddressRegReg(N, Disp, Base, DAG))
return false;
if (N.getOpcode() == ISD::ADD) {
short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm)) {
- Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
+ Disp = DAG.getTargetConstant(imm, N.getValueType());
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
} else {
Base = N.getOperand(0);
}
@@ -1072,7 +1137,8 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
} else if (N.getOpcode() == ISD::OR) {
short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm)) {
+ if (isIntS16Immediate(N.getOperand(1), imm) &&
+ (!Aligned || (imm & 3) == 0)) {
// If this is an or of disjoint bitfields, we can codegen this as an add
// (for better address arithmetic) if the LHS and RHS of the OR are
// provably disjoint.
@@ -1083,7 +1149,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If all of the bits are known zero on the LHS or RHS, the add won't
// carry.
Base = N.getOperand(0);
- Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32);
+ Disp = DAG.getTargetConstant(imm, N.getValueType());
return true;
}
}
@@ -1093,7 +1159,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
// If this address fits entirely in a 16-bit sext immediate field, codegen
// this as "d, 0"
short Imm;
- if (isIntS16Immediate(CN, Imm)) {
+ if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) {
Disp = DAG.getTargetConstant(Imm, CN->getValueType(0));
Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
CN->getValueType(0));
@@ -1101,8 +1167,9 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
// Handle 32-bit sext immediates with LIS + addr mode.
- if (CN->getValueType(0) == MVT::i32 ||
- (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
+ if ((CN->getValueType(0) == MVT::i32 ||
+ (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
+ (!Aligned || (CN->getZExtValue() & 3) == 0)) {
int Addr = (int)CN->getZExtValue();
// Otherwise, break this down into an LIS + disp.
@@ -1116,9 +1183,10 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp,
}
Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
+ fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
+ } else
Base = N;
return true; // [r+0]
}
@@ -1150,92 +1218,6 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
return true;
}
-/// SelectAddressRegImmShift - Returns true if the address N can be
-/// represented by a base register plus a signed 14-bit displacement
-/// [r+imm*4]. Suitable for use by STD and friends.
-bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp,
- SDValue &Base,
- SelectionDAG &DAG) const {
- // FIXME dl should come from the parent load or store, not the address
- DebugLoc dl = N.getDebugLoc();
- // If this can be more profitably realized as r+r, fail.
- if (SelectAddressRegReg(N, Disp, Base, DAG))
- return false;
-
- if (N.getOpcode() == ISD::ADD) {
- short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
- Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- } else {
- Base = N.getOperand(0);
- }
- return true; // [r+i]
- } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
- // Match LOAD (ADD (X, Lo(G))).
- assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
- && "Cannot handle constant offsets yet!");
- Disp = N.getOperand(1).getOperand(0); // The global address.
- assert(Disp.getOpcode() == ISD::TargetGlobalAddress ||
- Disp.getOpcode() == ISD::TargetConstantPool ||
- Disp.getOpcode() == ISD::TargetJumpTable);
- Base = N.getOperand(0);
- return true; // [&g+r]
- }
- } else if (N.getOpcode() == ISD::OR) {
- short imm = 0;
- if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) {
- // If this is an or of disjoint bitfields, we can codegen this as an add
- // (for better address arithmetic) if the LHS and RHS of the OR are
- // provably disjoint.
- APInt LHSKnownZero, LHSKnownOne;
- DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne);
- if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
- // If all of the bits are known zero on the LHS or RHS, the add won't
- // carry.
- Base = N.getOperand(0);
- Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32);
- return true;
- }
- }
- } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
- // Loading from a constant address. Verify low two bits are clear.
- if ((CN->getZExtValue() & 3) == 0) {
- // If this address fits entirely in a 14-bit sext immediate field, codegen
- // this as "d, 0"
- short Imm;
- if (isIntS16Immediate(CN, Imm)) {
- Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy());
- Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
- CN->getValueType(0));
- return true;
- }
-
- // Fold the low-part of 32-bit absolute addresses into addr mode.
- if (CN->getValueType(0) == MVT::i32 ||
- (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) {
- int Addr = (int)CN->getZExtValue();
-
- // Otherwise, break this down into an LIS + disp.
- Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32);
- Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32);
- unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
- Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0);
- return true;
- }
- }
- }
-
- Disp = DAG.getTargetConstant(0, getPointerTy());
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
- Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
- else
- Base = N;
- return true; // [r+0]
-}
-
-
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -1288,18 +1270,16 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
return true;
}
- // LDU/STU use reg+imm*4, others use reg+imm.
+ // LDU/STU can only handle immediates that are a multiple of 4.
if (VT != MVT::i64) {
- // reg + imm
- if (!SelectAddressRegImm(Ptr, Offset, Base, DAG))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false))
return false;
} else {
// LDU/STU need an address with at least 4-byte alignment.
if (Alignment < 4)
return false;
- // reg + imm * 4.
- if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG))
+ if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true))
return false;
}
@@ -1324,8 +1304,8 @@ bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags.
static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags,
unsigned &LoOpFlags, const GlobalValue *GV = 0) {
- HiOpFlags = PPCII::MO_HA16;
- LoOpFlags = PPCII::MO_LO16;
+ HiOpFlags = PPCII::MO_HA;
+ LoOpFlags = PPCII::MO_LO;
// Don't use the pic base if not in PIC relocation model. Or if we are on a
// non-darwin platform. We don't support PIC on other platforms yet.
@@ -1355,7 +1335,7 @@ static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
SelectionDAG &DAG) {
EVT PtrVT = HiPart.getValueType();
SDValue Zero = DAG.getConstant(0, PtrVT);
- DebugLoc DL = HiPart.getDebugLoc();
+ SDLoc DL(HiPart);
SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
@@ -1380,7 +1360,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0);
- return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA,
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
@@ -1401,7 +1381,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
// The actual address of the GlobalValue is stored in the TOC.
if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) {
SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA,
+ return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA,
DAG.getRegister(PPC::X2, MVT::i64));
}
@@ -1428,8 +1408,12 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
+ // FIXME: TLS addresses currently use medium model code sequences,
+ // which is the most useful form. Eventually support for small and
+ // large models could be added if users need it, at the cost of
+ // additional complexity.
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
- DebugLoc dl = GA->getDebugLoc();
+ SDLoc dl(GA);
const GlobalValue *GV = GA->getGlobal();
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
@@ -1438,9 +1422,9 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_HA);
+ PPCII::MO_TPREL_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
- PPCII::MO_TPREL16_LO);
+ PPCII::MO_TPREL_LO);
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
@@ -1452,12 +1436,14 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (Model == TLSModel::InitialExec) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
+ SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
+ PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl,
PtrVT, GOTReg, TGA);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl,
PtrVT, TGA, TPOffsetHi);
- return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA);
+ return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
}
if (Model == TLSModel::GeneralDynamic) {
@@ -1515,7 +1501,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
EVT PtrVT = Op.getValueType();
GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
- DebugLoc DL = GSDN->getDebugLoc();
+ SDLoc DL(GSDN);
const GlobalValue *GV = GSDN->getGlobal();
// 64-bit SVR4 ABI code is always position-independent.
@@ -1546,7 +1532,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// If we're comparing for equality to zero, expose the fact that this is
// implented as a ctlz/srl pair on ppc, so that the dag combiner can
@@ -1595,7 +1581,7 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
SDValue InChain = Node->getOperand(0);
SDValue VAListPtr = Node->getOperand(1);
const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
- DebugLoc dl = Node->getDebugLoc();
+ SDLoc dl(Node);
assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
@@ -1695,6 +1681,18 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
false, false, false, 0);
}
+SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const {
+ assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
+
+ // We have to copy the entire va_list struct:
+ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
+ return DAG.getMemcpy(Op.getOperand(0), Op,
+ Op.getOperand(1), Op.getOperand(2),
+ DAG.getConstant(12, MVT::i32), 8, false, true,
+ MachinePointerInfo(), MachinePointerInfo());
+}
+
SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
return Op.getOperand(0);
@@ -1706,7 +1704,7 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Trmp = Op.getOperand(1); // trampoline
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
bool isPPC64 = (PtrVT == MVT::i64);
@@ -1748,7 +1746,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
MachineFunction &MF = DAG.getMachineFunction();
PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -1842,18 +1840,24 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG,
#include "PPCGenCallingConv.inc"
-static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+// Function whose sole purpose is to kill compiler warnings
+// stemming from unused functions included from PPCGenCallingConv.inc.
+CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const {
+ return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS;
+}
+
+bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
return true;
}
-static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::R3, PPC::R4, PPC::R5, PPC::R6,
PPC::R7, PPC::R8, PPC::R9, PPC::R10,
@@ -1876,11 +1880,11 @@ static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
return false;
}
-static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
- MVT &LocVT,
- CCValAssign::LocInfo &LocInfo,
- ISD::ArgFlagsTy &ArgFlags,
- CCState &State) {
+bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
static const uint16_t ArgRegs[] = {
PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
PPC::F8
@@ -1931,7 +1935,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals)
const {
if (PPCSubTarget.isSVR4ABI()) {
@@ -1953,7 +1957,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// 32-bit SVR4 ABI Stack Frame Layout:
@@ -2170,14 +2174,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4(
SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
SelectionDAG &DAG, SDValue ArgVal,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (Flags.isSExt())
ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
else if (Flags.isZExt())
ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
DAG.getValueType(ObjectVT));
-
+
return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
}
@@ -2213,7 +2217,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
@@ -2304,6 +2308,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4(
InVals.push_back(FIN);
continue;
}
+
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ CurArgOffset = ArgOffset;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (ObjSize < PtrByteSize)
CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize);
@@ -2502,7 +2513,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg>
&Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// TODO: add description of PPC stack frame format, or at least some docs.
//
@@ -2600,17 +2611,17 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
SmallVector<SDValue, 8> MemOps;
unsigned nAltivecParamsAtEnd = 0;
- // FIXME: FuncArg and Ins[ArgNo] must reference the same argument.
- // When passing anonymous aggregates, this is currently not true.
- // See LowerFormalArguments_64SVR4 for a fix.
Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin();
- for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo, ++FuncArg) {
+ unsigned CurArgIdx = 0;
+ for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
SDValue ArgVal;
bool needsLoad = false;
EVT ObjectVT = Ins[ArgNo].VT;
unsigned ObjSize = ObjectVT.getSizeInBits()/8;
unsigned ArgSize = ObjSize;
ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
+ std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx);
+ CurArgIdx = Ins[ArgNo].OrigArgIndex;
unsigned CurArgOffset = ArgOffset;
@@ -3002,9 +3013,9 @@ struct TailCallArgumentInfo {
static void
StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG,
SDValue Chain,
- const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs,
- SmallVector<SDValue, 8> &MemOpChains,
- DebugLoc dl) {
+ const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
+ SmallVectorImpl<SDValue> &MemOpChains,
+ SDLoc dl) {
for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
SDValue Arg = TailCallArgs[i].Arg;
SDValue FIN = TailCallArgs[i].FrameIdxOp;
@@ -3026,7 +3037,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int SPDiff,
bool isPPC64,
bool isDarwinABI,
- DebugLoc dl) {
+ SDLoc dl) {
if (SPDiff) {
// Calculate the new stack slot for the return address.
int SlotSize = isPPC64 ? 8 : 4;
@@ -3061,7 +3072,7 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
static void
CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SDValue Arg, int SPDiff, unsigned ArgOffset,
- SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
@@ -3083,7 +3094,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
SDValue &LROpOut,
SDValue &FPOpOut,
bool isDarwinABI,
- DebugLoc dl) const {
+ SDLoc dl) const {
if (SPDiff) {
// Load the LR and FP stack slot for later adjusting.
EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32;
@@ -3113,7 +3124,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG,
static SDValue
CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32);
return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
false, false, MachinePointerInfo(0),
@@ -3126,9 +3137,9 @@ static void
LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
SDValue Arg, SDValue PtrOff, int SPDiff,
unsigned ArgOffset, bool isPPC64, bool isTailCall,
- bool isVector, SmallVector<SDValue, 8> &MemOpChains,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments,
- DebugLoc dl) {
+ bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments,
+ SDLoc dl) {
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
if (!isTailCall) {
if (isVector) {
@@ -3149,9 +3160,9 @@ LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain,
static
void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
- DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
+ SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes,
SDValue LROp, SDValue FPOp, bool isDarwinABI,
- SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) {
+ SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
MachineFunction &MF = DAG.getMachineFunction();
// Emit a sequence of copyto/copyfrom virtual registers for arguments that
@@ -3171,15 +3182,15 @@ void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain,
// Emit callseq_end just before tailcall node.
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
- DAG.getIntPtrConstant(0, true), InFlag);
+ DAG.getIntPtrConstant(0, true), InFlag, dl);
InFlag = Chain.getValue(1);
}
static
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
- SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall,
- SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
- SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys,
+ SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
+ SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
+ SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
const PPCSubtarget &PPCSubTarget) {
bool isPPC64 = PPCSubTarget.isPPC64();
@@ -3363,7 +3374,7 @@ SDValue
PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
SmallVector<CCValAssign, 16> RVLocs;
@@ -3406,7 +3417,7 @@ PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
}
SDValue
-PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
+PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
bool isTailCall, bool isVarArg,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -3476,7 +3487,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated.
needsTOCRestore = true;
- } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) {
+ } else if ((CallOpc == PPCISD::CALL) &&
+ (!isLocalCall(Callee) ||
+ DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
}
@@ -3493,7 +3506,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true),
- InFlag);
+ InFlag, dl);
if (!Ins.empty())
InFlag = Chain.getValue(1);
@@ -3505,10 +3518,10 @@ SDValue
PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const {
SelectionDAG &DAG = CLI.DAG;
- DebugLoc &dl = CLI.DL;
- SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
- SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
+ SDLoc &dl = CLI.DL;
+ SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+ SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+ SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
SDValue Chain = CLI.Chain;
SDValue Callee = CLI.Callee;
bool &isTailCall = CLI.IsTailCall;
@@ -3542,7 +3555,7 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
// of the 32-bit SVR4 ABI stack frame layout.
@@ -3628,7 +3641,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be moved somewhere else
@@ -3679,7 +3693,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
// This must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
Chain = CallSeqStart = NewCallSeqStart;
@@ -3755,13 +3770,14 @@ PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue CallSeqStart,
ISD::ArgFlagsTy Flags,
SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
CallSeqStart.getNode()->getOperand(0),
Flags, DAG, dl);
// The MEMCPY must go outside the CALLSEQ_START..END.
SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall,
- CallSeqStart.getNode()->getOperand(1));
+ CallSeqStart.getNode()->getOperand(1),
+ SDLoc(MemcpyCall));
DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
NewCallSeqStart.getNode());
return NewCallSeqStart;
@@ -3774,7 +3790,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned NumOps = Outs.size();
@@ -3815,7 +3831,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -3889,6 +3906,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
if (Size == 0)
continue;
+ unsigned BVAlign = Flags.getByValAlign();
+ if (BVAlign > 8) {
+ if (BVAlign % PtrByteSize != 0)
+ llvm_unreachable(
+ "ByVal alignment is not a multiple of the pointer size");
+
+ ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign;
+ }
+
// All aggregates smaller than 8 bytes must be passed right-justified.
if (Size==1 || Size==2 || Size==4) {
EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
@@ -3940,7 +3966,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// register.
// FIXME: The memcpy seems to produce pretty awful code for
// small aggregates, particularly for packed ones.
- // FIXME: It would be preferable to use the slot in the
+ // FIXME: It would be preferable to use the slot in the
// parameter save area instead of a new local variable.
SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType());
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
@@ -3980,7 +4006,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -4003,7 +4029,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
// must be passed right-justified in the stack doubleword, and
// in the GPR, if one is available.
SDValue StoreOff;
- if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) {
+ if (Arg.getSimpleValueType().SimpleTy == MVT::f32) {
SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType());
StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
} else
@@ -4145,7 +4171,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
unsigned NumOps = Outs.size();
@@ -4186,7 +4212,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
- Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ dl);
SDValue CallSeqStart = Chain;
// Load the return address and frame pointer so it can be move somewhere else
@@ -4310,7 +4337,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
continue;
}
- switch (Arg.getValueType().getSimpleVT().SimpleTy) {
+ switch (Arg.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unexpected ValueType for argument!");
case MVT::i32:
case MVT::i64:
@@ -4502,7 +4529,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const {
+ SDLoc dl, SelectionDAG &DAG) const {
SmallVector<CCValAssign, 16> RVLocs;
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
@@ -4551,7 +4578,7 @@ PPCTargetLowering::LowerReturn(SDValue Chain,
SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const {
// When we pop the dynamic allocation we need to restore the SP link.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Get the corect type for pointers.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4636,7 +4663,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
// Get the inputs.
SDValue Chain = Op.getOperand(0);
SDValue Size = Op.getOperand(1);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Get the corect type for pointers.
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -4653,7 +4680,7 @@ SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
DAG.getVTList(MVT::i32, MVT::Other),
Op.getOperand(0), Op.getOperand(1));
@@ -4661,7 +4688,7 @@ SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc DL = Op.getDebugLoc();
+ SDLoc DL(Op);
return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
Op.getOperand(0), Op.getOperand(1));
}
@@ -4687,7 +4714,7 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
EVT CmpVT = Op.getOperand(0).getValueType();
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// If the RHS of the comparison is a 0.0, we don't need to do the
// subtraction at all.
@@ -4768,14 +4795,14 @@ SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
// FIXME: Split this code up when LegalizeDAGTypes lands.
SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
- DebugLoc dl) const {
+ SDLoc dl) const {
assert(Op.getOperand(0).getValueType().isFloatingPoint());
SDValue Src = Op.getOperand(0);
if (Src.getValueType() == MVT::f32)
Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
SDValue Tmp;
- switch (Op.getValueType().getSimpleVT().SimpleTy) {
+ switch (Op.getSimpleValueType().SimpleTy) {
default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
case MVT::i32:
Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ :
@@ -4827,7 +4854,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Don't handle ppc_fp128 here; let it be lowered to a libcall.
if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
return SDValue();
@@ -4961,7 +4988,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
/*
The rounding mode is in bits 30:31 of FPSR, and has the following
settings:
@@ -5027,7 +5054,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
"Unexpected SHL!");
@@ -5055,7 +5082,7 @@ SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned BitWidth = VT.getSizeInBits();
assert(Op.getNumOperands() == 3 &&
VT == Op.getOperand(1).getValueType() &&
@@ -5083,7 +5110,7 @@ SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
}
SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
EVT VT = Op.getValueType();
unsigned BitWidth = VT.getSizeInBits();
assert(Op.getNumOperands() == 3 &&
@@ -5118,7 +5145,7 @@ SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
/// BuildSplatI - Build a canonical splati of Val with an element size of
/// SplatSize. Cast the result to VT.
static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
- SelectionDAG &DAG, DebugLoc dl) {
+ SelectionDAG &DAG, SDLoc dl) {
assert(Val >= -16 && Val <= 15 && "vsplti is out of range!");
static const EVT VTys[] = { // canonical VT to use for each size.
@@ -5142,10 +5169,20 @@ static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT,
return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res);
}
+/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
+/// specified intrinsic ID.
+static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op,
+ SelectionDAG &DAG, SDLoc dl,
+ EVT DestVT = MVT::Other) {
+ if (DestVT == MVT::Other) DestVT = Op.getValueType();
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(IID, MVT::i32), Op);
+}
+
/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
- SelectionDAG &DAG, DebugLoc dl,
+ SelectionDAG &DAG, SDLoc dl,
EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = LHS.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
@@ -5156,7 +5193,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
/// specified intrinsic ID.
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
SDValue Op2, SelectionDAG &DAG,
- DebugLoc dl, EVT DestVT = MVT::Other) {
+ SDLoc dl, EVT DestVT = MVT::Other) {
if (DestVT == MVT::Other) DestVT = Op0.getValueType();
return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2);
@@ -5166,7 +5203,7 @@ static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
/// amount. The result has the specified value type.
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
- EVT VT, SelectionDAG &DAG, DebugLoc dl) {
+ EVT VT, SelectionDAG &DAG, SDLoc dl) {
// Force LHS/RHS to be the right type.
LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
@@ -5185,7 +5222,7 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt,
// sequence of ops that should be used.
SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
@@ -5341,7 +5378,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
/// the specified operations to build the shuffle.
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
SDValue RHS, SelectionDAG &DAG,
- DebugLoc dl) {
+ SDLoc dl) {
unsigned OpNum = (PFEntry >> 26) & 0x0F;
unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
@@ -5420,7 +5457,7 @@ static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
/// lowered into a vperm.
SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue V1 = Op.getOperand(0);
SDValue V2 = Op.getOperand(1);
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
@@ -5587,7 +5624,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
// If this is a lowered altivec predicate compare, CompareOpc is set to the
// opcode number of the comparison.
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
int CompareOpc;
bool isDot;
if (!getAltivecCompareInfo(Op, CompareOpc, isDot))
@@ -5612,7 +5649,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
// Now that we have the comparison, emit a copy from the CR to a GPR.
// This is flagged to the above dot comparison.
- SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32,
+ SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
DAG.getRegister(PPC::CR6, MVT::i32),
CompNode.getValue(1));
@@ -5651,7 +5688,7 @@ SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
@@ -5668,7 +5705,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
}
SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
if (Op.getValueType() == MVT::v4i32) {
SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
@@ -5745,6 +5782,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::VAARG:
return LowerVAARG(Op, DAG, PPCSubTarget);
+ case ISD::VACOPY:
+ return LowerVACOPY(Op, DAG, PPCSubTarget);
+
case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget);
case ISD::DYNAMIC_STACKALLOC:
return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget);
@@ -5755,7 +5795,7 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::FP_TO_UINT:
case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG,
- Op.getDebugLoc());
+ SDLoc(Op));
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
@@ -5772,6 +5812,9 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
case ISD::MUL: return LowerMUL(Op, DAG);
+ // For counter-based loop handling.
+ case ISD::INTRINSIC_W_CHAIN: return SDValue();
+
// Frame & Return address.
case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
@@ -5782,10 +5825,26 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
SmallVectorImpl<SDValue>&Results,
SelectionDAG &DAG) const {
const TargetMachine &TM = getTargetMachine();
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
switch (N->getOpcode()) {
default:
llvm_unreachable("Do not know how to custom type legalize this operation!");
+ case ISD::INTRINSIC_W_CHAIN: {
+ if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
+ Intrinsic::ppc_is_decremented_ctr_nonzero)
+ break;
+
+ assert(N->getValueType(0) == MVT::i1 &&
+ "Unexpected result type for CTR decrement intrinsic");
+ EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0));
+ SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
+ SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
+ N->getOperand(1));
+
+ Results.push_back(NewInt);
+ Results.push_back(NewInt.getValue(1));
+ break;
+ }
case ISD::VAARG: {
if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
|| TM.getSubtarget<PPCSubtarget>().isPPC64())
@@ -5821,6 +5880,9 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
case ISD::FP_TO_SINT:
+ // LowerFP_TO_INT() can only handle f32 and f64.
+ if (N->getOperand(0).getValueType() == MVT::ppcf128)
+ return;
Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl));
return;
}
@@ -6092,6 +6154,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
// thisMBB:
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
// Prepare IP either in reg.
const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
@@ -6101,15 +6164,32 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) {
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
.addReg(PPC::X2)
- .addImm(TOCOffset / 4)
+ .addImm(TOCOffset)
.addReg(BufReg);
-
MIB.setMemRefs(MMOBegin, MMOEnd);
}
+ // Naked functions never have a base pointer, and so we use r1. For all
+ // other functions, this decision must be delayed until during PEI.
+ unsigned BaseReg;
+ if (MF->getFunction()->getAttributes().hasAttribute(
+ AttributeSet::FunctionIndex, Attribute::Naked))
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1;
+ else
+ BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP;
+
+ MIB = BuildMI(*thisMBB, MI, DL,
+ TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW))
+ .addReg(BaseReg)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ MIB.setMemRefs(MMOBegin, MMOEnd);
+
// Setup
MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
- MIB.addRegMask(PPCRegInfo->getNoPreservedMask());
+ const PPCRegisterInfo *TRI =
+ static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo());
+ MIB.addRegMask(TRI->getNoPreservedMask());
BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
@@ -6129,7 +6209,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI,
if (PPCSubTarget.isPPC64()) {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
.addReg(LabelReg)
- .addImm(LabelOffset / 4)
+ .addImm(LabelOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
@@ -6176,12 +6256,14 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Since FP is only updated here but NOT referenced, it's treated as GPR.
unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
+ unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30;
MachineInstrBuilder MIB;
const int64_t LabelOffset = 1 * PVT.getStoreSize();
const int64_t SPOffset = 2 * PVT.getStoreSize();
const int64_t TOCOffset = 3 * PVT.getStoreSize();
+ const int64_t BPOffset = 4 * PVT.getStoreSize();
unsigned BufReg = MI->getOperand(0).getReg();
@@ -6202,7 +6284,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Reload IP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
- .addImm(LabelOffset / 4)
+ .addImm(LabelOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
@@ -6214,7 +6296,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
// Reload SP
if (PVT == MVT::i64) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
- .addImm(SPOffset / 4)
+ .addImm(SPOffset)
.addReg(BufReg);
} else {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
@@ -6223,13 +6305,22 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI,
}
MIB.setMemRefs(MMOBegin, MMOEnd);
- // FIXME: When we also support base pointers, that register must also be
- // restored here.
+ // Reload BP
+ if (PVT == MVT::i64) {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ } else {
+ MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
+ .addImm(BPOffset)
+ .addReg(BufReg);
+ }
+ MIB.setMemRefs(MMOBegin, MMOEnd);
// Reload TOC
if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) {
MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
- .addImm(TOCOffset / 4)
+ .addImm(TOCOffset)
.addReg(BufReg);
MIB.setMemRefs(MMOBegin, MMOEnd);
@@ -6272,8 +6363,10 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
Cond.push_back(MI->getOperand(1));
DebugLoc dl = MI->getDebugLoc();
- PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond,
- MI->getOperand(2).getReg(), MI->getOperand(3).getReg());
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(),
+ Cond, MI->getOperand(2).getReg(),
+ MI->getOperand(3).getReg());
} else if (MI->getOpcode() == PPC::SELECT_CC_I4 ||
MI->getOpcode() == PPC::SELECT_CC_I8 ||
MI->getOpcode() == PPC::SELECT_CC_F4 ||
@@ -6633,51 +6726,6 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg);
- } else if (MI->getOpcode() == PPC::FRINDrint ||
- MI->getOpcode() == PPC::FRINSrint) {
- bool isf32 = MI->getOpcode() == PPC::FRINSrint;
- unsigned Dest = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
- DebugLoc dl = MI->getDebugLoc();
-
- MachineRegisterInfo &RegInfo = F->getRegInfo();
- unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
-
- // Perform the rounding.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest)
- .addReg(Src);
-
- // Compare the results.
- BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg)
- .addReg(Dest).addReg(Src);
-
- // If the results were not equal, then set the FPSCR XX bit.
- MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
- F->insert(It, midMBB);
- F->insert(It, exitMBB);
- exitMBB->splice(exitMBB->begin(), BB,
- llvm::next(MachineBasicBlock::iterator(MI)),
- BB->end());
- exitMBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BuildMI(*BB, MI, dl, TII->get(PPC::BCC))
- .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB);
-
- BB->addSuccessor(midMBB);
- BB->addSuccessor(exitMBB);
-
- BB = midMBB;
-
- // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set
- // the FI bit here because that will not automatically set XX also,
- // and XX is what libm interprets as the FE_INEXACT flag.
- BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6);
- BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
-
- BB->addSuccessor(exitMBB);
-
- BB = exitMBB;
} else {
llvm_unreachable("Unexpected instr type to insert");
}
@@ -6717,7 +6765,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op,
++Iterations;
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPOne =
DAG.getConstantFP(1.0, VT.getScalarType());
@@ -6779,7 +6827,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
++Iterations;
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
SDValue FPThreeHalves =
DAG.getConstantFP(1.5, VT.getScalarType());
@@ -6823,11 +6871,120 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op,
return SDValue();
}
+// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
+// not enforce equality of the chain operands.
+static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base,
+ unsigned Bytes, int Dist,
+ SelectionDAG &DAG) {
+ EVT VT = LS->getMemoryVT();
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LS->getBasePtr();
+ SDValue BaseLoc = Base->getBasePtr();
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+
+ // Handle X+C
+ if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ const GlobalValue *GV1 = NULL;
+ const GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+// Return true is there is a nearyby consecutive load to the one provided
+// (regardless of alignment). We search up and down the chain, looking though
+// token factors and other loads (but nothing else). As a result, a true
+// results indicates that it is safe to create a new consecutive load adjacent
+// to the load provided.
+static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
+ SDValue Chain = LD->getChain();
+ EVT VT = LD->getMemoryVT();
+
+ SmallSet<SDNode *, 16> LoadRoots;
+ SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
+ SmallSet<SDNode *, 16> Visited;
+
+ // First, search up the chain, branching to follow all token-factor operands.
+ // If we find a consecutive load, then we're done, otherwise, record all
+ // nodes just above the top-level loads and token factors.
+ while (!Queue.empty()) {
+ SDNode *ChainNext = Queue.pop_back_val();
+ if (!Visited.insert(ChainNext))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) {
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ if (!Visited.count(ChainLD->getChain().getNode()))
+ Queue.push_back(ChainLD->getChain().getNode());
+ } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
+ for (SDNode::op_iterator O = ChainNext->op_begin(),
+ OE = ChainNext->op_end(); O != OE; ++O)
+ if (!Visited.count(O->getNode()))
+ Queue.push_back(O->getNode());
+ } else
+ LoadRoots.insert(ChainNext);
+ }
+
+ // Second, search down the chain, starting from the top-level nodes recorded
+ // in the first phase. These top-level nodes are the nodes just above all
+ // loads and token factors. Starting with their uses, recursively look though
+ // all loads (just the chain uses) and token factors to find a consecutive
+ // load.
+ Visited.clear();
+ Queue.clear();
+
+ for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
+ IE = LoadRoots.end(); I != IE; ++I) {
+ Queue.push_back(*I);
+
+ while (!Queue.empty()) {
+ SDNode *LoadRoot = Queue.pop_back_val();
+ if (!Visited.insert(LoadRoot))
+ continue;
+
+ if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot))
+ if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
+ return true;
+
+ for (SDNode::use_iterator UI = LoadRoot->use_begin(),
+ UE = LoadRoot->use_end(); UI != UE; ++UI)
+ if (((isa<LoadSDNode>(*UI) &&
+ cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) ||
+ UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI))
+ Queue.push_back(*UI);
+ }
+ }
+
+ return false;
+}
+
SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
const TargetMachine &TM = getTargetMachine();
SelectionDAG &DAG = DCI.DAG;
- DebugLoc dl = N->getDebugLoc();
+ SDLoc dl(N);
switch (N->getOpcode()) {
default: break;
case PPCISD::SHL:
@@ -6868,7 +7025,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(),
+ RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV);
DCI.AddToWorklist(RV.getNode());
return DAG.getNode(ISD::FMUL, dl, N->getValueType(0),
@@ -6881,7 +7038,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
DCI);
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
- RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(),
+ RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)),
N->getValueType(0), RV,
N->getOperand(1).getOperand(1));
DCI.AddToWorklist(RV.getNode());
@@ -6909,8 +7066,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
if (RV.getNode() != 0) {
DCI.AddToWorklist(RV.getNode());
RV = DAGCombineFastRecip(RV, DCI);
- if (RV.getNode() != 0)
+ if (RV.getNode() != 0) {
+ // Unfortunately, RV is now NaN if the input was exactly 0. Select out
+ // this case and force the answer to 0.
+
+ EVT VT = RV.getValueType();
+
+ SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType());
+ if (VT.isVector()) {
+ assert(VT.getVectorNumElements() == 4 && "Unknown vector type");
+ Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero);
+ }
+
+ SDValue ZeroCmp =
+ DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT),
+ N->getOperand(0), Zero, ISD::SETEQ);
+ DCI.AddToWorklist(ZeroCmp.getNode());
+ DCI.AddToWorklist(RV.getNode());
+
+ RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT,
+ ZeroCmp, Zero, RV);
return RV;
+ }
}
}
@@ -6999,6 +7176,160 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
cast<StoreSDNode>(N)->getMemOperand());
}
break;
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ EVT VT = LD->getValueType(0);
+ Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty);
+ if (ISD::isNON_EXTLoad(N) && VT.isVector() &&
+ TM.getSubtarget<PPCSubtarget>().hasAltivec() &&
+ (VT == MVT::v16i8 || VT == MVT::v8i16 ||
+ VT == MVT::v4i32 || VT == MVT::v4f32) &&
+ LD->getAlignment() < ABIAlignment) {
+ // This is a type-legal unaligned Altivec load.
+ SDValue Chain = LD->getChain();
+ SDValue Ptr = LD->getBasePtr();
+
+ // This implements the loading of unaligned vectors as described in
+ // the venerable Apple Velocity Engine overview. Specifically:
+ // https://developer.apple.com/hardwaredrivers/ve/alignment.html
+ // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
+ //
+ // The general idea is to expand a sequence of one or more unaligned
+ // loads into a alignment-based permutation-control instruction (lvsl),
+ // a series of regular vector loads (which always truncate their
+ // input address to an aligned address), and a series of permutations.
+ // The results of these permutations are the requested loaded values.
+ // The trick is that the last "extra" load is not taken from the address
+ // you might suspect (sizeof(vector) bytes after the last requested
+ // load), but rather sizeof(vector) - 1 bytes after the last
+ // requested vector. The point of this is to avoid a page fault if the
+ // base address happend to be aligned. This works because if the base
+ // address is aligned, then adding less than a full vector length will
+ // cause the last vector in the sequence to be (re)loaded. Otherwise,
+ // the next vector will be fetched as you might suspect was necessary.
+
+ // We might be able to reuse the permutation generation from
+ // a different base address offset from this one by an aligned amount.
+ // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
+ // optimization later.
+ SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr,
+ DAG, dl, MVT::v16i8);
+
+ // Refine the alignment of the original load (a "new" load created here
+ // which was identical to the first except for the alignment would be
+ // merged with the existing node regardless).
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(LD->getPointerInfo(),
+ LD->getMemOperand()->getFlags(),
+ LD->getMemoryVT().getStoreSize(),
+ ABIAlignment);
+ LD->refineAlignment(MMO);
+ SDValue BaseLoad = SDValue(LD, 0);
+
+ // Note that the value of IncOffset (which is provided to the next
+ // load's pointer info offset value, and thus used to calculate the
+ // alignment), and the value of IncValue (which is actually used to
+ // increment the pointer value) are different! This is because we
+ // require the next load to appear to be aligned, even though it
+ // is actually offset from the base pointer by a lesser amount.
+ int IncOffset = VT.getSizeInBits() / 8;
+ int IncValue = IncOffset;
+
+ // Walk (both up and down) the chain looking for another load at the real
+ // (aligned) offset (the alignment of the other load does not matter in
+ // this case). If found, then do not use the offset reduction trick, as
+ // that will prevent the loads from being later combined (as they would
+ // otherwise be duplicates).
+ if (!findConsecutiveLoad(LD, DAG))
+ --IncValue;
+
+ SDValue Increment = DAG.getConstant(IncValue, getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+
+ SDValue ExtraLoad =
+ DAG.getLoad(VT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncOffset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->isInvariant(), ABIAlignment);
+
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ BaseLoad.getValue(1), ExtraLoad.getValue(1));
+
+ if (BaseLoad.getValueType() != MVT::v4i32)
+ BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad);
+
+ if (ExtraLoad.getValueType() != MVT::v4i32)
+ ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad);
+
+ SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm,
+ BaseLoad, ExtraLoad, PermCntl, DAG, dl);
+
+ if (VT != MVT::v4i32)
+ Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm);
+
+ // Now we need to be really careful about how we update the users of the
+ // original load. We cannot just call DCI.CombineTo (or
+ // DAG.ReplaceAllUsesWith for that matter), because the load still has
+ // uses created here (the permutation for example) that need to stay.
+ SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+ while (UI != UE) {
+ SDUse &Use = UI.getUse();
+ SDNode *User = *UI;
+ // Note: BaseLoad is checked here because it might not be N, but a
+ // bitcast of N.
+ if (User == Perm.getNode() || User == BaseLoad.getNode() ||
+ User == TF.getNode() || Use.getResNo() > 1) {
+ ++UI;
+ continue;
+ }
+
+ SDValue To = Use.getResNo() ? TF : Perm;
+ ++UI;
+
+ SmallVector<SDValue, 8> Ops;
+ for (SDNode::op_iterator O = User->op_begin(),
+ OE = User->op_end(); O != OE; ++O) {
+ if (*O == Use)
+ Ops.push_back(To);
+ else
+ Ops.push_back(*O);
+ }
+
+ DAG.UpdateNodeOperands(User, Ops.data(), Ops.size());
+ }
+
+ return SDValue(N, 0);
+ }
+ }
+ break;
+ case ISD::INTRINSIC_WO_CHAIN:
+ if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() ==
+ Intrinsic::ppc_altivec_lvsl &&
+ N->getOperand(1)->getOpcode() == ISD::ADD) {
+ SDValue Add = N->getOperand(1);
+
+ if (DAG.MaskedValueIsZero(Add->getOperand(1),
+ APInt::getAllOnesValue(4 /* 16 byte alignment */).zext(
+ Add.getValueType().getScalarType().getSizeInBits()))) {
+ SDNode *BasePtr = Add->getOperand(0).getNode();
+ for (SDNode::use_iterator UI = BasePtr->use_begin(),
+ UE = BasePtr->use_end(); UI != UE; ++UI) {
+ if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+ cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() ==
+ Intrinsic::ppc_altivec_lvsl) {
+ // We've found another LVSL, and this address if an aligned
+ // multiple of that one. The results will be the same, so use the
+ // one we've just found instead.
+
+ return SDValue(*UI, 0);
+ }
+ }
+ }
+ }
+
+ break;
case ISD::BSWAP:
// Turn BSWAP (LOAD) -> lhbrx/lwbrx.
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
@@ -7083,20 +7414,53 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
}
}
- // If the user is a MFCR instruction, we know this is safe. Otherwise we
- // give up for right now.
- if (FlagUser->getOpcode() == PPCISD::MFCR)
+ // If the user is a MFOCRF instruction, we know this is safe.
+ // Otherwise we give up for right now.
+ if (FlagUser->getOpcode() == PPCISD::MFOCRF)
return SDValue(VCMPoNode, 0);
}
break;
}
case ISD::BR_CC: {
// If this is a branch on an altivec predicate comparison, lower this so
- // that we don't have to do a MFCR: instead, branch directly on CR6. This
+ // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
// lowering is done pre-legalize, because the legalizer lowers the predicate
// compare down to code that is difficult to reassemble.
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
+
+ // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
+ // value. If so, pass-through the AND to get to the intrinsic.
+ if (LHS.getOpcode() == ISD::AND &&
+ LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(LHS.getOperand(1)) &&
+ !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()->
+ isZero())
+ LHS = LHS.getOperand(0);
+
+ if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
+ cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
+ Intrinsic::ppc_is_decremented_ctr_nonzero &&
+ isa<ConstantSDNode>(RHS)) {
+ assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ "Counter decrement comparison is not EQ or NE");
+
+ unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
+ bool isBDNZ = (CC == ISD::SETEQ && Val) ||
+ (CC == ISD::SETNE && !Val);
+
+ // We now need to make the intrinsic dead (it cannot be instruction
+ // selected).
+ DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
+ assert(LHS.getNode()->hasOneUse() &&
+ "Counter decrement has more than one use");
+
+ return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
+ N->getOperand(0), N->getOperand(4));
+ }
+
int CompareOpc;
bool isDot;
@@ -7271,7 +7635,7 @@ PPCTargetLowering::getSingleConstraintMatchWeight(
std::pair<unsigned, const TargetRegisterClass*>
PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const {
+ MVT VT) const {
if (Constraint.size() == 1) {
// GCC RS6000 Constraint Letters
switch (Constraint[0]) {
@@ -7296,7 +7660,24 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
}
- return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+ std::pair<unsigned, const TargetRegisterClass*> R =
+ TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+
+ // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
+ // (which we call X[0-9]+). If a 64-bit value has been requested, and a
+ // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
+ // register.
+ // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
+ // the AsmName field from *RegisterInfo.td, then this would not be necessary.
+ if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() &&
+ PPC::GPRCRegClass.contains(R.first)) {
+ const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
+ return std::make_pair(TRI->getMatchingSuperReg(R.first,
+ PPC::sub_32, &PPC::G8RCRegClass),
+ &PPC::G8RCRegClass);
+ }
+
+ return R;
}
@@ -7406,25 +7787,13 @@ bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
-/// isLegalAddressImmediate - Return true if the integer value can be used
-/// as the offset of the target addressing mode for load / store of the
-/// given type.
-bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,Type *Ty) const{
- // PPC allows a sign-extended 16-bit immediate field.
- return (V > -(1 << 16) && V < (1 << 16)-1);
-}
-
-bool PPCTargetLowering::isLegalAddressImmediate(GlobalValue* GV) const {
- return false;
-}
-
SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
MFI->setReturnAddressIsTaken(true);
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
// Make sure the function does not optimize away the store of the RA to
@@ -7454,7 +7823,7 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
SelectionDAG &DAG) const {
- DebugLoc dl = Op.getDebugLoc();
+ SDLoc dl(Op);
unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
@@ -7537,18 +7906,15 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT,
return true;
}
-/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
-/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
-/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
-/// is expanded to mul + add.
-bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
+bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+ VT = VT.getScalarType();
+
if (!VT.isSimple())
return false;
switch (VT.getSimpleVT().SimpleTy) {
case MVT::f32:
case MVT::f64:
- case MVT::v4f32:
return true;
default:
break;
@@ -7558,9 +7924,15 @@ bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const {
}
Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const {
- if (DisableILPPref)
+ if (DisableILPPref || PPCSubTarget.enableMachineScheduler())
return TargetLowering::getSchedulingPreference(N);
return Sched::ILP;
}
+// Create a fast isel object.
+FastISel *
+PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const {
+ return PPC::createFastISel(FuncInfo, LibInfo);
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index 423e983..df3af35 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -20,6 +20,7 @@
#include "PPCRegisterInfo.h"
#include "PPCSubtarget.h"
#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/Target/TargetLowering.h"
namespace llvm {
@@ -115,11 +116,10 @@ namespace llvm {
/// Return with a flag operand, matched by 'blr'
RET_FLAG,
- /// R32 = MFCR(CRREG, INFLAG) - Represents the MFCRpseud/MFOCRF
- /// instructions. This copies the bits corresponding to the specified
- /// CRREG into the resultant GPR. Bits corresponding to other CR regs
- /// are undefined.
- MFCR,
+ /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
+ /// This copies the bits corresponding to the specified CRREG into the
+ /// resultant GPR. Bits corresponding to other CR regs are undefined.
+ MFOCRF,
// EH_SJLJ_SETJMP - SjLj exception handling setjmp.
EH_SJLJ_SETJMP,
@@ -146,6 +146,10 @@ namespace llvm {
/// an optional input flag argument.
COND_BRANCH,
+ /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
+ /// loops.
+ BDNZ, BDZ,
+
/// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
/// towards zero. Used only as part of the long double-to-int
/// conversion sequence.
@@ -175,61 +179,61 @@ namespace llvm {
/// G8RC = ADDIS_GOT_TPREL_HA %X2, Symbol - Used by the initial-exec
/// TLS model, produces an ADDIS8 instruction that adds the GOT
- /// base to sym@got@tprel@ha.
+ /// base to sym\@got\@tprel\@ha.
ADDIS_GOT_TPREL_HA,
/// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
/// TLS model, produces a LD instruction with base register G8RReg
- /// and offset sym@got@tprel@l. This completes the addition that
+ /// and offset sym\@got\@tprel\@l. This completes the addition that
/// finds the offset of "sym" relative to the thread pointer.
LD_GOT_TPREL_L,
/// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
/// model, produces an ADD instruction that adds the contents of
/// G8RReg to the thread pointer. Symbol contains a relocation
- /// sym@tls which is to be replaced by the thread pointer and
+ /// sym\@tls which is to be replaced by the thread pointer and
/// identifies to the linker that the instruction is part of a
/// TLS sequence.
ADD_TLS,
/// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym@got@tlsgd@ha.
+ /// register to sym\@got\@tlsgd\@ha.
ADDIS_TLSGD_HA,
/// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym@got@tlsgd@l.
+ /// sym\@got\@tlsgd\@l.
ADDI_TLSGD_L,
/// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym@tlsgd).
+ /// model, produces a call to __tls_get_addr(sym\@tlsgd).
GET_TLS_ADDR,
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
- /// register to sym@got@tlsld@ha.
+ /// register to sym\@got\@tlsld\@ha.
ADDIS_TLSLD_HA,
/// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym@got@tlsld@l.
+ /// sym\@got\@tlsld\@l.
ADDI_TLSLD_L,
/// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
- /// model, produces a call to __tls_get_addr(sym@tlsld).
+ /// model, produces a call to __tls_get_addr(sym\@tlsld).
GET_TLSLD_ADDR,
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
/// local-dynamic TLS model, produces an ADDIS8 instruction
- /// that adds X3 to sym@dtprel@ha. The Chain operand is needed
+ /// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
/// to tie this in place following a copy to %X3 from the result
/// of a GET_TLSLD_ADDR.
ADDIS_DTPREL_HA,
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
- /// sym@got@dtprel@l.
+ /// sym\@got\@dtprel\@l.
ADDI_DTPREL_L,
/// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
@@ -238,6 +242,10 @@ namespace llvm {
/// optimizations due to constant folding.
VADD_SPLAT,
+ /// CHAIN = SC CHAIN, Imm128 - System call. The 7-bit unsigned
+ /// operand identifies the operating system entry point.
+ SC,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
@@ -266,16 +274,16 @@ namespace llvm {
/// G8RC = ADDIS_TOC_HA %X2, Symbol - For medium and large code model,
/// produces an ADDIS8 instruction that adds the TOC base register to
- /// sym@toc@ha.
+ /// sym\@toc\@ha.
ADDIS_TOC_HA,
/// G8RC = LD_TOC_L Symbol, G8RReg - For medium and large code model,
/// produces a LD instruction with base register G8RReg and offset
- /// sym@toc@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
+ /// sym\@toc\@l. Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
LD_TOC_L,
/// G8RC = ADDI_TOC_L G8RReg, Symbol - For medium code model, produces
- /// an ADDI8 instruction that adds G8RReg to sym@toc@l.
+ /// an ADDI8 instruction that adds G8RReg to sym\@toc\@l.
/// Preceded by an ADDIS_TOC_HA to form a full 32-bit offset.
ADDI_TOC_L
};
@@ -327,8 +335,6 @@ namespace llvm {
class PPCTargetLowering : public TargetLowering {
const PPCSubtarget &PPCSubTarget;
- const PPCRegisterInfo *PPCRegInfo;
- const PPCInstrInfo *PPCII;
public:
explicit PPCTargetLowering(PPCTargetMachine &TM);
@@ -340,7 +346,7 @@ namespace llvm {
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
/// getSetCCResultType - Return the ISD::SETCC ValueType
- virtual EVT getSetCCResultType(EVT VT) const;
+ virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const;
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
@@ -358,21 +364,16 @@ namespace llvm {
/// SelectAddressRegImm - Returns true if the address N can be represented
/// by a base register plus a signed 16-bit displacement [r+imm], and if it
- /// is not better represented as reg+reg.
+ /// is not better represented as reg+reg. If Aligned is true, only accept
+ /// displacements suitable for STD and friends, i.e. multiples of 4.
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
- SelectionDAG &DAG) const;
+ SelectionDAG &DAG, bool Aligned) const;
/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
/// represented as an indexed [r+r] operation.
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
SelectionDAG &DAG) const;
- /// SelectAddressRegImmShift - Returns true if the address N can be
- /// represented by a base register plus a signed 14-bit displacement
- /// [r+imm*4]. Suitable for use by STD and friends.
- bool SelectAddressRegImmShift(SDValue N, SDValue &Disp, SDValue &Base,
- SelectionDAG &DAG) const;
-
Sched::Preference getSchedulingPreference(SDNode *N) const;
/// LowerOperation - Provide custom lowering hooks for some operations.
@@ -418,7 +419,7 @@ namespace llvm {
std::pair<unsigned, const TargetRegisterClass*>
getRegForInlineAsmConstraint(const std::string &Constraint,
- EVT VT) const;
+ MVT VT) const;
/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
/// function arguments in the caller parameter area. This is the actual
@@ -436,15 +437,6 @@ namespace llvm {
/// by AM is legal for this target, for a load/store of the specified type.
virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
- /// isLegalAddressImmediate - Return true if the integer value can be used
- /// as the offset of the target addressing mode for load / store of the
- /// given type.
- virtual bool isLegalAddressImmediate(int64_t V, Type *Ty) const;
-
- /// isLegalAddressImmediate - Return true if the GlobalValue can be used as
- /// the offset of the target addressing mode.
- virtual bool isLegalAddressImmediate(GlobalValue *GV) const;
-
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
/// getOptimalMemOpType - Returns the target specific optimal type for load
@@ -459,7 +451,7 @@ namespace llvm {
/// It returns EVT::Other if the type should be determined using generic
/// target-independent logic.
virtual EVT
- getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
+ getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
MachineFunction &MF) const;
@@ -467,11 +459,16 @@ namespace llvm {
/// relative to software emulation.
virtual bool allowsUnalignedMemoryAccesses(EVT VT, bool *Fast = 0) const;
- /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
- /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
- /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
- /// is expanded to mul + add.
- virtual bool isFMAFasterThanMulAndAdd(EVT VT) const;
+ /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
+ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
+ /// expanded to FMAs when this method returns true, otherwise fmuladd is
+ /// expanded to fmul + fadd.
+ virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const;
+
+ /// createFastISel - This method returns a target-specific FastISel object,
+ /// or null if the target does not support "fast" instruction selection.
+ virtual FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo) const;
private:
SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
@@ -490,7 +487,7 @@ namespace llvm {
SDValue &LROpOut,
SDValue &FPOpOut,
bool isDarwinABI,
- DebugLoc dl) const;
+ SDLoc dl) const;
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
@@ -506,12 +503,14 @@ namespace llvm {
const PPCSubtarget &Subtarget) const;
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
+ SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG,
+ const PPCSubtarget &Subtarget) const;
SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
const PPCSubtarget &Subtarget) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
- SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, DebugLoc dl) const;
+ SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, SDLoc dl) const;
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
@@ -526,9 +525,9 @@ namespace llvm {
SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
- SDValue FinishCall(CallingConv::ID CallConv, DebugLoc dl, bool isTailCall,
+ SDValue FinishCall(CallingConv::ID CallConv, SDLoc dl, bool isTailCall,
bool isVarArg,
SelectionDAG &DAG,
SmallVector<std::pair<unsigned, SDValue>, 8>
@@ -543,7 +542,7 @@ namespace llvm {
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
@@ -561,11 +560,11 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
- DebugLoc dl, SelectionDAG &DAG) const;
+ SDLoc dl, SelectionDAG &DAG) const;
SDValue
extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG,
- SDValue ArgVal, DebugLoc dl) const;
+ SDValue ArgVal, SDLoc dl) const;
void
setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG,
@@ -576,25 +575,25 @@ namespace llvm {
LowerFormalArguments_Darwin(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerFormalArguments_64SVR4(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerFormalArguments_32SVR4(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
- SelectionDAG &DAG, DebugLoc dl) const;
+ SelectionDAG &DAG, SDLoc dl) const;
SDValue
LowerCall_Darwin(SDValue Chain, SDValue Callee,
@@ -603,7 +602,7 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerCall_64SVR4(SDValue Chain, SDValue Callee,
@@ -612,7 +611,7 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue
LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
@@ -620,7 +619,7 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
- DebugLoc dl, SelectionDAG &DAG,
+ SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
@@ -628,7 +627,31 @@ namespace llvm {
SDValue DAGCombineFastRecip(SDValue Op, DAGCombinerInfo &DCI) const;
SDValue DAGCombineFastRecipFSQRT(SDValue Op, DAGCombinerInfo &DCI) const;
+
+ CCAssignFn *useFastISelCCs(unsigned Flag) const;
};
+
+ namespace PPC {
+ FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
+ const TargetLibraryInfo *LibInfo);
+ }
+
+ bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
+
+ bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
+ MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State);
}
#endif // LLVM_TARGET_POWERPC_PPC32ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index bff4c23..46db4fe 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -17,29 +17,39 @@
//
def s16imm64 : Operand<i64> {
let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
}
def u16imm64 : Operand<i64> {
let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
}
-def symbolHi64 : Operand<i64> {
- let PrintMethod = "printSymbolHi";
- let EncoderMethod = "getHA16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
-}
-def symbolLo64 : Operand<i64> {
- let PrintMethod = "printSymbolLo";
- let EncoderMethod = "getLO16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
+def s17imm64 : Operand<i64> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
}
def tocentry : Operand<iPTR> {
let MIOperandInfo = (ops i64imm:$imm);
}
+def PPCTLSRegOperand : AsmOperandClass {
+ let Name = "TLSReg"; let PredicateMethod = "isTLSReg";
+ let RenderMethod = "addTLSRegOperands";
+}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
+ let ParserMatchClass = PPCTLSRegOperand;
}
def tlsgd : Operand<i64> {}
+def tlscall : Operand<i64> {
+ let PrintMethod = "printTLSCall";
+ let MIOperandInfo = (ops calltarget:$func, tlsgd:$sym);
+ let EncoderMethod = "getTLSCallEncoding";
+}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@@ -78,7 +88,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isCodeGenOnly = 1 in
def BCCTR8 : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
- "b${cond:cc}ctr ${cond:reg}", BrB, []>,
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>,
Requires<[In64BitMode]>;
}
}
@@ -111,7 +121,10 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
def BL8 : IForm<18, 0, 1, (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
- def BLA8 : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ def BL8_TLS : IForm<18, 0, 1, (outs), (ins tlscall:$func),
+ "bl $func", BrB, []>;
+
+ def BLA8 : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
"bla $func", BrB, [(PPCcall (i64 imm:$func))]>;
}
let Uses = [RM], isCodeGenOnly = 1 in {
@@ -119,16 +132,12 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
- def BL8_NOP_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func, tlsgd:$sym),
- "bl $func($sym)\n\tnop", BrB, []>;
-
- def BL8_NOP_TLSLD : IForm_and_DForm_4_zero<18, 0, 1, 24,
- (outs), (ins calltarget:$func, tlsgd:$sym),
- "bl $func($sym)\n\tnop", BrB, []>;
+ def BL8_NOP_TLS : IForm_and_DForm_4_zero<18, 0, 1, 24,
+ (outs), (ins tlscall:$func),
+ "bl $func\n\tnop", BrB, []>;
def BLA8_NOP : IForm_and_DForm_4_zero<18, 1, 1, 24,
- (outs), (ins aaddr:$func),
+ (outs), (ins abscalltarget:$func),
"bla $func\n\tnop", BrB,
[(PPCcall_nop (i64 imm:$func))]>;
}
@@ -139,7 +148,7 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
let isCodeGenOnly = 1 in
def BCCTRL8 : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
- "b${cond:cc}ctrl ${cond:reg}", BrB, []>,
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>,
Requires<[In64BitMode]>;
}
}
@@ -207,7 +216,7 @@ def TCRETURNdi8 :Pseudo< (outs),
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai8 :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
+def TCRETURNai8 :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa8 $func $offset",
[(PPCtc_return (i64 imm:$func), imm:$offset)]>;
@@ -233,7 +242,7 @@ def TAILB8 : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
-def TAILBA8 : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+def TAILBA8 : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
"ba $dst", BrB,
[]>;
@@ -253,22 +262,26 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
let Interpretation64Bit = 1 in {
let neverHasSideEffects = 1 in {
-def MTCRF8 : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins g8rc:$rS),
+def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
+ "mtocrf $FXM, $ST", BrMCRX>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
+
+def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
"mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
-let isCodeGenOnly = 1 in
-def MFCR8pseud: XFXForm_3<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
- "#MFCR8pseud", SprMFCR>,
- PPC970_MicroCode, PPC970_Unit_CRU;
-} // neverHasSideEffects = 1
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
+ "mfocrf $rT, $FXM", SprMFCR>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
-let neverHasSideEffects = 1 in
def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ let Defs = [CTR8] in
def EH_SjLj_SetJmp64 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP64",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
@@ -293,8 +306,14 @@ def MTCTR8 : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR8] in {
+let Pattern = [(int_ppc_mtctr i64:$rS)] in
+def MTCTR8loop : XFXForm_7_ext<31, 467, 9, (outs), (ins g8rc:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
-let Pattern = [(set i64:$rT, readcyclecounter)] in
+let isCodeGenOnly = 1, Pattern = [(set i64:$rT, readcyclecounter)] in
def MFTB8 : XFXForm_1_ext<31, 339, 268, (outs g8rc:$rT), (ins),
"mfspr $rT, 268", SprMFTB>,
PPC970_DGroup_First, PPC970_Unit_FXU;
@@ -329,10 +348,10 @@ let Interpretation64Bit = 1 in {
let neverHasSideEffects = 1 in {
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins symbolLo64:$imm),
+def LI8 : DForm_2_r0<14, (outs g8rc:$rD), (ins s16imm64:$imm),
"li $rD, $imm", IntSimple,
- [(set i64:$rD, immSExt16:$imm)]>;
-def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins symbolHi64:$imm),
+ [(set i64:$rD, imm64SExt16:$imm)]>;
+def LIS8 : DForm_2_r0<15, (outs g8rc:$rD), (ins s17imm64:$imm),
"lis $rD, $imm", IntSimple,
[(set i64:$rD, imm16ShiftedSExt:$imm)]>;
}
@@ -392,9 +411,8 @@ defm ADD8 : XOForm_1r<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
[(set i64:$rT, (add i64:$rA, i64:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
-let isCodeGenOnly = 1 in
def ADD8TLS : XOForm_1<31, 266, 0, (outs g8rc:$rT), (ins g8rc:$rA, tlsreg:$rB),
- "add $rT, $rA, $rB@tls", IntSimple,
+ "add $rT, $rA, $rB", IntSimple,
[(set i64:$rT, (add i64:$rA, tglobaltlsaddr:$rB))]>;
defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
@@ -404,18 +422,18 @@ defm ADDC8 : XOForm_1rc<31, 10, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
let Defs = [CARRY] in
def ADDIC8 : DForm_2<12, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set i64:$rD, (addc i64:$rA, immSExt16:$imm))]>;
-def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolLo64:$imm),
+ [(set i64:$rD, (addc i64:$rA, imm64SExt16:$imm))]>;
+def ADDI8 : DForm_2<14, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s16imm64:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set i64:$rD, (add i64:$rA, immSExt16:$imm))]>;
-def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, symbolHi64:$imm),
+ [(set i64:$rD, (add i64:$rA, imm64SExt16:$imm))]>;
+def ADDIS8 : DForm_2<15, (outs g8rc:$rD), (ins g8rc_nox0:$rA, s17imm64:$imm),
"addis $rD, $rA, $imm", IntSimple,
[(set i64:$rD, (add i64:$rA, imm16ShiftedSExt:$imm))]>;
let Defs = [CARRY] in {
def SUBFIC8: DForm_2< 8, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set i64:$rD, (subc immSExt16:$imm, i64:$rA))]>;
+ [(set i64:$rD, (subc imm64SExt16:$imm, i64:$rA))]>;
defm SUBFC8 : XOForm_1r<31, 8, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"subfc", "$rT, $rA, $rB", IntGeneral,
[(set i64:$rT, (subc i64:$rB, i64:$rA))]>,
@@ -489,6 +507,14 @@ defm EXTSH8 : XForm_11r<31, 922, (outs g8rc:$rA), (ins g8rc:$rS),
[(set i64:$rA, (sext_inreg i64:$rS, i16))]>;
} // Interpretation64Bit
+// For fast-isel:
+let isCodeGenOnly = 1 in {
+def EXTSB8_32_64 : XForm_11<31, 954, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsb $rA, $rS", IntSimple, []>, isPPC64;
+def EXTSH8_32_64 : XForm_11<31, 922, (outs g8rc:$rA), (ins gprc:$rS),
+ "extsh $rA, $rS", IntSimple, []>, isPPC64;
+} // isCodeGenOnly for fast-isel
+
defm EXTSW : XForm_11r<31, 986, (outs g8rc:$rA), (ins g8rc:$rS),
"extsw", "$rA, $rS", IntSimple,
[(set i64:$rA, (sext_inreg i64:$rS, i32))]>, isPPC64;
@@ -503,16 +529,16 @@ defm SRADI : XSForm_1rc<31, 413, (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH),
defm CNTLZD : XForm_11r<31, 58, (outs g8rc:$rA), (ins g8rc:$rS),
"cntlzd", "$rA, $rS", IntGeneral,
[(set i64:$rA, (ctlz i64:$rS))]>;
-defm POPCNTD : XForm_11r<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
- "popcntd", "$rA, $rS", IntGeneral,
- [(set i64:$rA, (ctpop i64:$rS))]>;
+def POPCNTD : XForm_11<31, 506, (outs g8rc:$rA), (ins g8rc:$rS),
+ "popcntd $rA, $rS", IntGeneral,
+ [(set i64:$rA, (ctpop i64:$rS))]>;
// popcntw also does a population count on the high 32 bits (storing the
// results in the high 32-bits of the output). We'll ignore that here (which is
// safe because we never separately use the high part of the 64-bit registers).
-defm POPCNTW : XForm_11r<31, 378, (outs gprc:$rA), (ins gprc:$rS),
- "popcntw", "$rA, $rS", IntGeneral,
- [(set i32:$rA, (ctpop i32:$rS))]>;
+def POPCNTW : XForm_11<31, 378, (outs gprc:$rA), (ins gprc:$rS),
+ "popcntw $rA, $rS", IntGeneral,
+ [(set i32:$rA, (ctpop i32:$rS))]>;
defm DIVD : XOForm_1r<31, 489, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"divd", "$rT, $rA, $rB", IntDivD,
@@ -525,6 +551,9 @@ defm DIVDU : XOForm_1r<31, 457, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
defm MULLD : XOForm_1r<31, 233, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB),
"mulld", "$rT, $rA, $rB", IntMulHD,
[(set i64:$rT, (mul i64:$rA, i64:$rB))]>, isPPC64;
+def MULLI8 : DForm_2<7, (outs g8rc:$rD), (ins g8rc:$rA, s16imm64:$imm),
+ "mulli $rD, $rA, $imm", IntMulLI,
+ [(set i64:$rD, (mul i64:$rA, imm64SExt16:$imm))]>;
}
let neverHasSideEffects = 1 in {
@@ -541,14 +570,30 @@ defm RLDCL : MDSForm_1r<30, 8,
(outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
"rldcl", "$rA, $rS, $rB, $MBE", IntRotateD,
[]>, isPPC64;
+defm RLDCR : MDSForm_1r<30, 9,
+ (outs g8rc:$rA), (ins g8rc:$rS, gprc:$rB, u6imm:$MBE),
+ "rldcr", "$rA, $rS, $rB, $MBE", IntRotateD,
+ []>, isPPC64;
defm RLDICL : MDForm_1r<30, 0,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicl", "$rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
+// For fast-isel:
+let isCodeGenOnly = 1 in
+def RLDICL_32_64 : MDForm_1<30, 0,
+ (outs g8rc:$rA),
+ (ins gprc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldicl $rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
+// End fast-isel.
defm RLDICR : MDForm_1r<30, 1,
(outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
"rldicr", "$rA, $rS, $SH, $MBE", IntRotateDI,
[]>, isPPC64;
+defm RLDIC : MDForm_1r<30, 2,
+ (outs g8rc:$rA), (ins g8rc:$rS, u6imm:$SH, u6imm:$MBE),
+ "rldic", "$rA, $rS, $SH, $MBE", IntRotateDI,
+ []>, isPPC64;
let Interpretation64Bit = 1 in {
defm RLWINM8 : MForm_2r<21, (outs g8rc:$rA),
@@ -592,6 +637,15 @@ def LWAX : XForm_1<31, 341, (outs g8rc:$rD), (ins memrr:$src),
"lwax $rD, $src", LdStLHA,
[(set i64:$rD, (sextloadi32 xaddr:$src))]>, isPPC64,
PPC970_DGroup_Cracked;
+// For fast-isel:
+let isCodeGenOnly = 1, mayLoad = 1 in {
+def LWA_32 : DSForm_1<58, 2, (outs gprc:$rD), (ins memrix:$src),
+ "lwa $rD, $src", LdStLWA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+def LWAX_32 : XForm_1<31, 341, (outs gprc:$rD), (ins memrr:$src),
+ "lwax $rD, $src", LdStLHA, []>, isPPC64,
+ PPC970_DGroup_Cracked;
+} // end fast-isel isCodeGenOnly
// Update forms.
let mayLoad = 1, neverHasSideEffects = 1 in {
@@ -750,25 +804,25 @@ def ADDItocL: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
(PPCaddiTocL i64:$reg, tglobaladdr:$disp))]>, isPPC64;
// Support for thread-local storage.
-def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDISgotTprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISgotTprelHA",
[(set i64:$rD,
(PPCaddisGotTprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins symbolLo64:$disp, g8rc_nox0:$reg),
+def LDgotTprelL: Pseudo<(outs g8rc:$rD), (ins s16imm64:$disp, g8rc_nox0:$reg),
"#LDgotTprelL",
[(set i64:$rD,
(PPCldGotTprelL tglobaltlsaddr:$disp, i64:$reg))]>,
isPPC64;
def : Pat<(PPCaddTls i64:$in, tglobaltlsaddr:$g),
(ADD8TLS $in, tglobaltlsaddr:$g)>;
-def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDIStlsgdHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsgdHA",
[(set i64:$rD,
(PPCaddisTlsgdHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
+def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsgdL",
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -778,12 +832,12 @@ def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
(PPCaddisTlsldHA i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
+def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDItlsldL",
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -793,13 +847,13 @@ def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
-def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolHi64:$disp),
+def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,
(PPCaddisDtprelHA i64:$reg,
tglobaltlsaddr:$disp))]>,
isPPC64;
-def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, symbolLo64:$disp),
+def ADDIdtprelL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIdtprelL",
[(set i64:$rD,
(PPCaddiDtprelL i64:$reg, tglobaltlsaddr:$disp))]>,
@@ -914,6 +968,9 @@ let PPC970_Unit = 3, neverHasSideEffects = 1,
defm FCFID : XForm_26r<63, 846, (outs f8rc:$frD), (ins f8rc:$frB),
"fcfid", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64;
+defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctid", "$frD, $frB", FPGeneral,
+ []>, isPPC64;
defm FCTIDZ : XForm_26r<63, 815, (outs f8rc:$frD), (ins f8rc:$frB),
"fctidz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctidz f64:$frB))]>, isPPC64;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index cc9cf0a..a55abe3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -229,35 +229,45 @@ let Predicates = [HasAltivec] in {
let isCodeGenOnly = 1 in {
def DSS : DSS_Form<822, (outs),
(ins u5imm:$ZERO0, u5imm:$STRM,u5imm:$ZERO1,u5imm:$ZERO2),
- "dss $STRM", LdStLoad /*FIXME*/, []>;
+ "dss $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSSALL : DSS_Form<822, (outs),
(ins u5imm:$ONE, u5imm:$ZERO0,u5imm:$ZERO1,u5imm:$ZERO2),
- "dssall", LdStLoad /*FIXME*/, []>;
+ "dssall", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DST : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTT : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTST : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTSTT : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, gprc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DST64 : DSS_Form<342, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTT64 : DSS_Form<342, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTST64 : DSS_Form<374, (outs),
(ins u5imm:$ZERO, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dstst $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
def DSTSTT64 : DSS_Form<374, (outs),
(ins u5imm:$ONE, u5imm:$STRM, g8rc:$rA, gprc:$rB),
- "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>;
+ "dststt $rA, $rB, $STRM", LdStLoad /*FIXME*/, []>,
+ Deprecated<DeprecatedDST>;
}
def MFVSCR : VXForm_4<1540, (outs vrrc:$vD), (ins),
@@ -392,7 +402,7 @@ def VCTUXS : VXForm_1<906, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
// Defines with the UIM field set to 0 for floating-point
// to integer (fp_to_sint/fp_to_uint) conversions and integer
// to floating-point (sint_to_fp/uint_to_fp) conversions.
-let VA = 0 in {
+let isCodeGenOnly = 1, VA = 0 in {
def VCFSX_0 : VXForm_1<842, (outs vrrc:$vD), (ins vrrc:$vB),
"vcfsx $vD, $vB, 0", VecFP,
[(set v4f32:$vD,
@@ -664,15 +674,29 @@ def VCMPGTSWo : VCMPo<902, "vcmpgtsw. $vD, $vA, $vB", v4i32>;
def VCMPGTUW : VCMP <646, "vcmpgtuw $vD, $vA, $vB" , v4i32>;
def VCMPGTUWo : VCMPo<646, "vcmpgtuw. $vD, $vA, $vB", v4i32>;
-let isCodeGenOnly = 1 in
-def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+let isCodeGenOnly = 1 in {
+def V_SET0B : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set v16i8:$vD, (v16i8 immAllZerosV))]>;
+def V_SET0H : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
+ "vxor $vD, $vD, $vD", VecFP,
+ [(set v8i16:$vD, (v8i16 immAllZerosV))]>;
+def V_SET0 : VXForm_setzero<1220, (outs vrrc:$vD), (ins),
"vxor $vD, $vD, $vD", VecFP,
[(set v4i32:$vD, (v4i32 immAllZerosV))]>;
+
let IMM=-1 in {
-def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
+def V_SETALLONESB : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", VecFP,
+ [(set v16i8:$vD, (v16i8 immAllOnesV))]>;
+def V_SETALLONESH : VXForm_3<908, (outs vrrc:$vD), (ins),
+ "vspltisw $vD, -1", VecFP,
+ [(set v8i16:$vD, (v8i16 immAllOnesV))]>;
+def V_SETALLONES : VXForm_3<908, (outs vrrc:$vD), (ins),
"vspltisw $vD, -1", VecFP,
[(set v4i32:$vD, (v4i32 immAllOnesV))]>;
}
+}
} // VALU Operations.
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index b6f4e85..29233d4 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -145,6 +145,33 @@ class BForm_2<bits<6> opcode, bits<5> bo, bits<5> bi, bit aa, bit lk,
let Inst{31} = lk;
}
+class BForm_3<bits<6> opcode, bit aa, bit lk,
+ dag OOL, dag IOL, string asmstr>
+ : I<opcode, OOL, IOL, asmstr, BrB> {
+ bits<5> BO;
+ bits<5> BI;
+ bits<14> BD;
+
+ let Inst{6-10} = BO;
+ let Inst{11-15} = BI;
+ let Inst{16-29} = BD;
+ let Inst{30} = aa;
+ let Inst{31} = lk;
+}
+
+// 1.7.3 SC-Form
+class SCForm<bits<6> opcode, bits<1> xo,
+ dag OOL, dag IOL, string asmstr, InstrItinClass itin,
+ list<dag> pattern>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<7> LEV;
+
+ let Pattern = pattern;
+
+ let Inst{20-26} = LEV;
+ let Inst{30} = xo;
+}
+
// 1.7.4 D-Form
class DForm_base<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
@@ -371,6 +398,13 @@ class XForm_1a<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let RST = 0;
}
+class XForm_rs<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let A = 0;
+ let B = 0;
+}
+
class XForm_6<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo_swapped<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -411,6 +445,17 @@ class XForm_16<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = 0;
}
+class XForm_mtmsr<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin>
+ : I<opcode, OOL, IOL, asmstr, itin> {
+ bits<5> RS;
+ bits<1> L;
+
+ let Inst{6-10} = RS;
+ let Inst{15} = L;
+ let Inst{21-30} = xo;
+}
+
class XForm_16_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: XForm_16<opcode, xo, OOL, IOL, asmstr, itin> {
@@ -446,14 +491,23 @@ class XForm_24<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
class XForm_24_sync<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
string asmstr, InstrItinClass itin, list<dag> pattern>
: I<opcode, OOL, IOL, asmstr, itin> {
+ bits<2> L;
+
let Pattern = pattern;
- let Inst{6-10} = 0;
+ let Inst{6-8} = 0;
+ let Inst{9-10} = L;
let Inst{11-15} = 0;
let Inst{16-20} = 0;
let Inst{21-30} = xo;
let Inst{31} = 0;
}
+class XForm_24_eieio<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmstr, InstrItinClass itin, list<dag> pattern>
+ : XForm_24_sync<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let L = 0;
+}
+
class XForm_25<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
@@ -498,6 +552,21 @@ class XForm_43<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
let Inst{31} = RC;
}
+class XForm_0<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+ let B = 0;
+}
+
+class XForm_16b<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+ InstrItinClass itin, list<dag> pattern>
+ : XForm_base_r3xo<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+ let RST = 0;
+ let A = 0;
+}
+
// DCB_Form - Form X instruction, used for dcb* instructions.
class DCB_Form<bits<10> xo, bits<5> immfield, dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 1fb17eb..315ad04 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -33,7 +33,7 @@
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRMAP_INFO
-#define GET_INSTRINFO_CTOR
+#define GET_INSTRINFO_CTOR_DTOR
#include "PPCGenInstrInfo.inc"
using namespace llvm;
@@ -45,9 +45,12 @@ opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,
static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",
cl::desc("Disable compare instruction optimization"), cl::Hidden);
+// Pin the vtable to this file.
+void PPCInstrInfo::anchor() {}
+
PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
- TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+ TM(tm), RI(*TM.getSubtargetImpl()) {}
/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
/// this target when scheduling the DAG.
@@ -74,10 +77,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer(
// Most subtargets use a PPC970 recognizer.
if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&
Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {
- const TargetInstrInfo *TII = TM.getInstrInfo();
- assert(TII && "No InstrInfo?");
+ assert(TM.getInstrInfo() && "No InstrInfo?");
- return new PPCHazardRecognizer970(*TII);
+ return new PPCHazardRecognizer970(TM);
}
return new PPCScoreboardHazardRecognizer(II, DAG);
@@ -449,7 +451,9 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
// isel is for regular integer GPRs only.
if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&
- !PPC::G8RCRegClass.hasSubClassEq(RC))
+ !PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&
+ !PPC::G8RCRegClass.hasSubClassEq(RC) &&
+ !PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))
return false;
// FIXME: These numbers are for the A2, how well they work for other cores is
@@ -479,12 +483,15 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
const TargetRegisterClass *RC =
RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
assert(RC && "TrueReg and FalseReg must have overlapping register classes");
- assert((PPC::GPRCRegClass.hasSubClassEq(RC) ||
- PPC::G8RCRegClass.hasSubClassEq(RC)) &&
+
+ bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||
+ PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);
+ assert((Is64Bit ||
+ PPC::GPRCRegClass.hasSubClassEq(RC) ||
+ PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&
"isel is for regular integer GPRs only");
- unsigned OpCode =
- PPC::GPRCRegClass.hasSubClassEq(RC) ? PPC::ISEL : PPC::ISEL8;
+ unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
unsigned SelectPred = Cond[0].getImm();
unsigned SubIdx;
@@ -792,16 +799,6 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
NewMIs.back()->addMemOperand(MF, MMO);
}
-MachineInstr*
-PPCInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx, uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const {
- MachineInstrBuilder MIB = BuildMI(MF, DL, get(PPC::DBG_VALUE));
- addFrameReference(MIB, FrameIx, 0, false).addImm(Offset).addMetadata(MDPtr);
- return &*MIB;
-}
-
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
@@ -991,6 +988,10 @@ bool PPCInstrInfo::SubsumesPredicate(
if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)
return false;
+ // P1 can only subsume P2 if they test the same condition register.
+ if (Pred1[1].getReg() != Pred2[1].getReg())
+ return false;
+
PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();
PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();
@@ -1156,25 +1157,19 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
MachineInstr *UseMI = &*I;
if (UseMI->getOpcode() == PPC::BCC) {
unsigned Pred = UseMI->getOperand(0).getImm();
- if (Pred == PPC::PRED_EQ || Pred == PPC::PRED_NE)
- continue;
-
- return false;
+ if (Pred != PPC::PRED_EQ && Pred != PPC::PRED_NE)
+ return false;
} else if (UseMI->getOpcode() == PPC::ISEL ||
UseMI->getOpcode() == PPC::ISEL8) {
unsigned SubIdx = UseMI->getOperand(3).getSubReg();
- if (SubIdx == PPC::sub_eq)
- continue;
-
- return false;
+ if (SubIdx != PPC::sub_eq)
+ return false;
} else
return false;
}
}
- // Get ready to iterate backward from CmpInstr.
- MachineBasicBlock::iterator I = CmpInstr, E = MI,
- B = CmpInstr->getParent()->begin();
+ MachineBasicBlock::iterator I = CmpInstr;
// Scan forward to find the first use of the compare.
for (MachineBasicBlock::iterator EL = CmpInstr->getParent()->end();
@@ -1191,9 +1186,6 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
break;
}
- // Early exit if we're at the beginning of the BB.
- if (I == B) return false;
-
// There are two possible candidates which can be changed to set CR[01].
// One is MI, the other is a SUB instruction.
// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
@@ -1213,6 +1205,11 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr,
// Search for Sub.
const TargetRegisterInfo *TRI = &getRegisterInfo();
--I;
+
+ // Get ready to iterate backward from CmpInstr.
+ MachineBasicBlock::iterator E = MI,
+ B = CmpInstr->getParent()->begin();
+
for (; I != E && !noSub; --I) {
const MachineInstr &Instr = *I;
unsigned IOpC = Instr.getOpcode();
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 34a1a73..f140c41 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -78,6 +78,7 @@ class PPCInstrInfo : public PPCGenInstrInfo {
const TargetRegisterClass *RC,
SmallVectorImpl<MachineInstr*> &NewMIs,
bool &NonRI, bool &SpillsVRS) const;
+ virtual void anchor();
public:
explicit PPCInstrInfo(PPCTargetMachine &TM);
@@ -148,12 +149,6 @@ public:
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
- virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF,
- int FrameIx,
- uint64_t Offset,
- const MDNode *MDPtr,
- DebugLoc DL) const;
-
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index 4763069..2bd3aad 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -162,6 +162,10 @@ def PPCeh_sjlj_longjmp : SDNode<"PPCISD::EH_SJLJ_LONGJMP",
SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
[SDNPHasChain, SDNPSideEffect]>;
+def SDT_PPCsc : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def PPCsc : SDNode<"PPCISD::SC", SDT_PPCsc,
+ [SDNPHasChain, SDNPSideEffect]>;
+
def PPCvcmp : SDNode<"PPCISD::VCMP" , SDT_PPCvcmp, []>;
def PPCvcmp_o : SDNode<"PPCISD::VCMPo", SDT_PPCvcmp, [SDNPOutGlue]>;
@@ -246,13 +250,15 @@ def maskimm32 : PatLeaf<(imm), [{
return false;
}]>;
-def immSExt16 : PatLeaf<(imm), [{
- // immSExt16 predicate - True if the immediate fits in a 16-bit sign extended
- // field. Used by instructions like 'addi'.
- if (N->getValueType(0) == MVT::i32)
- return (int32_t)N->getZExtValue() == (short)N->getZExtValue();
- else
- return (int64_t)N->getZExtValue() == (short)N->getZExtValue();
+def imm32SExt16 : Operand<i32>, ImmLeaf<i32, [{
+ // imm32SExt16 predicate - True if the i32 immediate fits in a 16-bit
+ // sign extended field. Used by instructions like 'addi'.
+ return (int32_t)Imm == (short)Imm;
+}]>;
+def imm64SExt16 : Operand<i64>, ImmLeaf<i64, [{
+ // imm64SExt16 predicate - True if the i64 immediate fits in a 16-bit
+ // sign extended field. Used by instructions like 'addi'.
+ return (int64_t)Imm == (short)Imm;
}]>;
def immZExt16 : PatLeaf<(imm), [{
// immZExt16 predicate - True if the immediate fits in a 16-bit zero extended
@@ -283,7 +289,7 @@ def imm16ShiftedSExt : PatLeaf<(imm), [{
}], HI16>;
// Some r+i load/store instructions (such as LD, STD, LDU, etc.) that require
-// restricted memrix (offset/4) constants are alignment sensitive. If these
+// restricted memrix (4-aligned) constants are alignment sensitive. If these
// offsets are hidden behind TOC entries than the values of the lower-order
// bits cannot be checked directly. As a result, we need to also incorporate
// an alignment check into the relevant patterns.
@@ -386,7 +392,7 @@ def vrrc : RegisterOperand<VRRC> {
let ParserMatchClass = PPCRegVRRCAsmOperand;
}
def PPCRegCRBITRCAsmOperand : AsmOperandClass {
- let Name = "RegCRBITRC"; let PredicateMethod = "isRegNumber";
+ let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
}
def crbitrc : RegisterOperand<CRBITRC> {
let ParserMatchClass = PPCRegCRBITRCAsmOperand;
@@ -428,6 +434,7 @@ def PPCS16ImmAsmOperand : AsmOperandClass {
}
def s16imm : Operand<i32> {
let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCS16ImmAsmOperand;
}
def PPCU16ImmAsmOperand : AsmOperandClass {
@@ -436,31 +443,58 @@ def PPCU16ImmAsmOperand : AsmOperandClass {
}
def u16imm : Operand<i32> {
let PrintMethod = "printU16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
let ParserMatchClass = PPCU16ImmAsmOperand;
}
+def PPCS17ImmAsmOperand : AsmOperandClass {
+ let Name = "S17Imm"; let PredicateMethod = "isS17Imm";
+ let RenderMethod = "addImmOperands";
+}
+def s17imm : Operand<i32> {
+ // This operand type is used for addis/lis to allow the assembler parser
+ // to accept immediates in the range -65536..65535 for compatibility with
+ // the GNU assembler. The operand is treated as 16-bit otherwise.
+ let PrintMethod = "printS16ImmOperand";
+ let EncoderMethod = "getImm16Encoding";
+ let ParserMatchClass = PPCS17ImmAsmOperand;
+}
+def PPCDirectBrAsmOperand : AsmOperandClass {
+ let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
+ let RenderMethod = "addBranchTargetOperands";
+}
def directbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def absdirectbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
+}
+def PPCCondBrAsmOperand : AsmOperandClass {
+ let Name = "CondBr"; let PredicateMethod = "isCondBr";
+ let RenderMethod = "addBranchTargetOperands";
}
def condbrtarget : Operand<OtherVT> {
let PrintMethod = "printBranchOperand";
let EncoderMethod = "getCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
+}
+def abscondbrtarget : Operand<OtherVT> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsCondBrEncoding";
+ let ParserMatchClass = PPCCondBrAsmOperand;
}
def calltarget : Operand<iPTR> {
+ let PrintMethod = "printBranchOperand";
let EncoderMethod = "getDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
}
-def aaddr : Operand<iPTR> {
- let PrintMethod = "printAbsAddrOperand";
-}
-def symbolHi: Operand<i32> {
- let PrintMethod = "printSymbolHi";
- let EncoderMethod = "getHA16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
-}
-def symbolLo: Operand<i32> {
- let PrintMethod = "printSymbolLo";
- let EncoderMethod = "getLO16Encoding";
- let ParserMatchClass = PPCS16ImmAsmOperand;
+def abscalltarget : Operand<iPTR> {
+ let PrintMethod = "printAbsBranchOperand";
+ let EncoderMethod = "getAbsDirectBrEncoding";
+ let ParserMatchClass = PPCDirectBrAsmOperand;
}
def PPCCRBitMaskOperand : AsmOperandClass {
let Name = "CRBitMask"; let PredicateMethod = "isCRBitMask";
@@ -488,12 +522,14 @@ def ptr_rc_idx : Operand<iPTR>, PointerLikeRegClass<0> {
def PPCDispRIOperand : AsmOperandClass {
let Name = "DispRI"; let PredicateMethod = "isS16Imm";
+ let RenderMethod = "addImmOperands";
}
def dispRI : Operand<iPTR> {
let ParserMatchClass = PPCDispRIOperand;
}
def PPCDispRIXOperand : AsmOperandClass {
let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4";
+ let RenderMethod = "addImmOperands";
}
def dispRIX : Operand<iPTR> {
let ParserMatchClass = PPCDispRIXOperand;
@@ -508,8 +544,8 @@ def memrr : Operand<iPTR> {
let PrintMethod = "printMemRegReg";
let MIOperandInfo = (ops ptr_rc_nor0:$ptrreg, ptr_rc_idx:$offreg);
}
-def memrix : Operand<iPTR> { // memri where the imm is shifted 2 bits.
- let PrintMethod = "printMemRegImmShifted";
+def memrix : Operand<iPTR> { // memri where the imm is 4-aligned.
+ let PrintMethod = "printMemRegImm";
let MIOperandInfo = (ops dispRIX:$imm, ptr_rc_nor0:$reg);
let EncoderMethod = "getMemRIXEncoding";
}
@@ -530,7 +566,7 @@ def pred : Operand<OtherVT> {
def iaddr : ComplexPattern<iPTR, 2, "SelectAddrImm", [], []>;
def xaddr : ComplexPattern<iPTR, 2, "SelectAddrIdx", [], []>;
def xoaddr : ComplexPattern<iPTR, 2, "SelectAddrIdxOnly",[], []>;
-def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmShift", [], []>; // "std"
+def ixaddr : ComplexPattern<iPTR, 2, "SelectAddrImmX4", [], []>; // "std"
// The address in a single register. This is used with the SjLj
// pseudo-instructions.
@@ -749,6 +785,20 @@ multiclass XForm_26r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
}
}
+multiclass XForm_28r<bits<6> opcode, bits<10> xo, dag OOL, dag IOL,
+ string asmbase, string asmstr, InstrItinClass itin,
+ list<dag> pattern> {
+ let BaseName = asmbase in {
+ def NAME : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(" ", asmstr)), itin,
+ pattern>, RecFormRel;
+ let Defs = [CR1] in
+ def o : XForm_28<opcode, xo, OOL, IOL,
+ !strconcat(asmbase, !strconcat(". ", asmstr)), itin,
+ []>, isDOT, RecFormRel;
+ }
+}
+
multiclass AForm_1r<bits<6> opcode, bits<5> xo, dag OOL, dag IOL,
string asmbase, string asmstr, InstrItinClass itin,
list<dag> pattern> {
@@ -860,7 +910,7 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7 in {
let isCodeGenOnly = 1 in
def BCCTR : XLForm_2_br<19, 528, 0, (outs), (ins pred:$cond),
- "b${cond:cc}ctr ${cond:reg}", BrB, []>;
+ "b${cond:cc}ctr${cond:pm} ${cond:reg}", BrB, []>;
}
}
@@ -873,6 +923,8 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
def B : IForm<18, 0, 0, (outs), (ins directbrtarget:$dst),
"b $dst", BrB,
[(br bb:$dst)]>;
+ def BA : IForm<18, 1, 0, (outs), (ins absdirectbrtarget:$dst),
+ "ba $dst", BrB, []>;
}
// BCC represents an arbitrary conditional branch on a predicate.
@@ -880,18 +932,29 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
// a two-value operand where a dag node expects two operands. :(
let isCodeGenOnly = 1 in {
def BCC : BForm<16, 0, 0, (outs), (ins pred:$cond, condbrtarget:$dst),
- "b${cond:cc} ${cond:reg}, $dst"
+ "b${cond:cc}${cond:pm} ${cond:reg}, $dst"
/*[(PPCcondbranch crrc:$crS, imm:$opc, bb:$dst)]*/>;
+ def BCCA : BForm<16, 1, 0, (outs), (ins pred:$cond, abscondbrtarget:$dst),
+ "b${cond:cc}a${cond:pm} ${cond:reg}, $dst">;
+
let isReturn = 1, Uses = [LR, RM] in
def BCLR : XLForm_2_br<19, 16, 0, (outs), (ins pred:$cond),
- "b${cond:cc}lr ${cond:reg}", BrB, []>;
+ "b${cond:cc}lr${cond:pm} ${cond:reg}", BrB, []>;
+ }
- let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
- def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
+ let isReturn = 1, Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLR : XLForm_2_ext<19, 16, 18, 0, 0, (outs), (ins),
"bdzlr", BrB, []>;
- def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
+ def BDNZLR : XLForm_2_ext<19, 16, 16, 0, 0, (outs), (ins),
"bdnzlr", BrB, []>;
- }
+ def BDZLRp : XLForm_2_ext<19, 16, 27, 0, 0, (outs), (ins),
+ "bdzlr+", BrB, []>;
+ def BDNZLRp: XLForm_2_ext<19, 16, 25, 0, 0, (outs), (ins),
+ "bdnzlr+", BrB, []>;
+ def BDZLRm : XLForm_2_ext<19, 16, 26, 0, 0, (outs), (ins),
+ "bdzlr-", BrB, []>;
+ def BDNZLRm: XLForm_2_ext<19, 16, 24, 0, 0, (outs), (ins),
+ "bdnzlr-", BrB, []>;
}
let Defs = [CTR], Uses = [CTR] in {
@@ -899,6 +962,26 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7 in {
"bdz $dst">;
def BDNZ : BForm_1<16, 16, 0, 0, (outs), (ins condbrtarget:$dst),
"bdnz $dst">;
+ def BDZA : BForm_1<16, 18, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza $dst">;
+ def BDNZA : BForm_1<16, 16, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza $dst">;
+ def BDZp : BForm_1<16, 27, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz+ $dst">;
+ def BDNZp: BForm_1<16, 25, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz+ $dst">;
+ def BDZAp : BForm_1<16, 27, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza+ $dst">;
+ def BDNZAp: BForm_1<16, 25, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza+ $dst">;
+ def BDZm : BForm_1<16, 26, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdz- $dst">;
+ def BDNZm: BForm_1<16, 24, 0, 0, (outs), (ins condbrtarget:$dst),
+ "bdnz- $dst">;
+ def BDZAm : BForm_1<16, 26, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdza- $dst">;
+ def BDNZAm: BForm_1<16, 24, 1, 0, (outs), (ins abscondbrtarget:$dst),
+ "bdnza- $dst">;
}
}
@@ -915,8 +998,15 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
let Uses = [RM] in {
def BL : IForm<18, 0, 1, (outs), (ins calltarget:$func),
"bl $func", BrB, []>; // See Pat patterns below.
- def BLA : IForm<18, 1, 1, (outs), (ins aaddr:$func),
+ def BLA : IForm<18, 1, 1, (outs), (ins abscalltarget:$func),
"bla $func", BrB, [(PPCcall (i32 imm:$func))]>;
+
+ let isCodeGenOnly = 1 in {
+ def BCCL : BForm<16, 0, 1, (outs), (ins pred:$cond, condbrtarget:$dst),
+ "b${cond:cc}l${cond:pm} ${cond:reg}, $dst">;
+ def BCCLA : BForm<16, 1, 1, (outs), (ins pred:$cond, abscondbrtarget:$dst),
+ "b${cond:cc}la${cond:pm} ${cond:reg}, $dst">;
+ }
}
let Uses = [CTR, RM] in {
def BCTRL : XLForm_2_ext<19, 528, 20, 0, 1, (outs), (ins),
@@ -925,7 +1015,55 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
let isCodeGenOnly = 1 in
def BCCTRL : XLForm_2_br<19, 528, 1, (outs), (ins pred:$cond),
- "b${cond:cc}ctrl ${cond:reg}", BrB, []>;
+ "b${cond:cc}ctrl${cond:pm} ${cond:reg}", BrB, []>;
+ }
+ let Uses = [LR, RM] in {
+ def BLRL : XLForm_2_ext<19, 16, 20, 0, 1, (outs), (ins),
+ "blrl", BrB, []>;
+
+ let isCodeGenOnly = 1 in
+ def BCLRL : XLForm_2_br<19, 16, 1, (outs), (ins pred:$cond),
+ "b${cond:cc}lrl${cond:pm} ${cond:reg}", BrB, []>;
+ }
+ let Defs = [CTR], Uses = [CTR, RM] in {
+ def BDZL : BForm_1<16, 18, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl $dst">;
+ def BDNZL : BForm_1<16, 16, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl $dst">;
+ def BDZLA : BForm_1<16, 18, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla $dst">;
+ def BDNZLA : BForm_1<16, 16, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla $dst">;
+ def BDZLp : BForm_1<16, 27, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl+ $dst">;
+ def BDNZLp: BForm_1<16, 25, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl+ $dst">;
+ def BDZLAp : BForm_1<16, 27, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla+ $dst">;
+ def BDNZLAp: BForm_1<16, 25, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla+ $dst">;
+ def BDZLm : BForm_1<16, 26, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdzl- $dst">;
+ def BDNZLm: BForm_1<16, 24, 0, 1, (outs), (ins condbrtarget:$dst),
+ "bdnzl- $dst">;
+ def BDZLAm : BForm_1<16, 26, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdzla- $dst">;
+ def BDNZLAm: BForm_1<16, 24, 1, 1, (outs), (ins abscondbrtarget:$dst),
+ "bdnzla- $dst">;
+ }
+ let Defs = [CTR], Uses = [CTR, LR, RM] in {
+ def BDZLRL : XLForm_2_ext<19, 16, 18, 0, 1, (outs), (ins),
+ "bdzlrl", BrB, []>;
+ def BDNZLRL : XLForm_2_ext<19, 16, 16, 0, 1, (outs), (ins),
+ "bdnzlrl", BrB, []>;
+ def BDZLRLp : XLForm_2_ext<19, 16, 27, 0, 1, (outs), (ins),
+ "bdzlrl+", BrB, []>;
+ def BDNZLRLp: XLForm_2_ext<19, 16, 25, 0, 1, (outs), (ins),
+ "bdnzlrl+", BrB, []>;
+ def BDZLRLm : XLForm_2_ext<19, 16, 26, 0, 1, (outs), (ins),
+ "bdzlrl-", BrB, []>;
+ def BDNZLRLm: XLForm_2_ext<19, 16, 24, 0, 1, (outs), (ins),
+ "bdnzlrl-", BrB, []>;
}
}
@@ -937,7 +1075,7 @@ def TCRETURNdi :Pseudo< (outs),
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [RM] in
-def TCRETURNai :Pseudo<(outs), (ins aaddr:$func, i32imm:$offset),
+def TCRETURNai :Pseudo<(outs), (ins abscalltarget:$func, i32imm:$offset),
"#TC_RETURNa $func $offset",
[(PPCtc_return (i32 imm:$func), imm:$offset)]>;
@@ -954,23 +1092,22 @@ let isTerminator = 1, isBarrier = 1, PPC970_Unit = 7, isBranch = 1,
def TAILBCTR : XLForm_2_ext<19, 528, 20, 0, 0, (outs), (ins), "bctr", BrB, []>,
Requires<[In32BitMode]>;
-
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
def TAILB : IForm<18, 0, 0, (outs), (ins calltarget:$dst),
"b $dst", BrB,
[]>;
-}
-
let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, PPC970_Unit = 7,
isBarrier = 1, isCall = 1, isReturn = 1, Uses = [RM] in
-def TAILBA : IForm<18, 0, 0, (outs), (ins aaddr:$dst),
+def TAILBA : IForm<18, 0, 0, (outs), (ins abscalltarget:$dst),
"ba $dst", BrB,
[]>;
+}
+
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
+ let Defs = [CTR] in
def EH_SjLj_SetJmp32 : Pseudo<(outs gprc:$dst), (ins memr:$buf),
"#EH_SJLJ_SETJMP32",
[(set i32:$dst, (PPCeh_sjlj_setjmp addr:$buf))]>,
@@ -987,6 +1124,12 @@ let isBranch = 1, isTerminator = 1 in {
"#EH_SjLj_Setup\t$dst", []>;
}
+// System call.
+let PPC970_Unit = 7 in {
+ def SC : SCForm<17, 1, (outs), (ins i32imm:$lev),
+ "sc $lev", BrB, [(PPCsc (i32 imm:$lev))]>;
+}
+
// DCB* instructions.
def DCBA : DCB_Form<758, 0, (outs), (ins memrr:$dst),
"dcba $dst", LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
@@ -1110,6 +1253,15 @@ def STWCX : XForm_1<31, 150, (outs), (ins gprc:$rS, memrr:$dst),
let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
def TRAP : XForm_24<31, 4, (outs), (ins), "trap", LdStLoad, [(trap)]>;
+def TWI : DForm_base<3, (outs), (ins u5imm:$to, gprc:$rA, s16imm:$imm),
+ "twi $to, $rA, $imm", IntTrapW, []>;
+def TW : XForm_1<31, 4, (outs), (ins u5imm:$to, gprc:$rA, gprc:$rB),
+ "tw $to, $rA, $rB", IntTrapW, []>;
+def TDI : DForm_base<2, (outs), (ins u5imm:$to, g8rc:$rA, s16imm:$imm),
+ "tdi $to, $rA, $imm", IntTrapD, []>;
+def TD : XForm_1<31, 68, (outs), (ins u5imm:$to, g8rc:$rA, g8rc:$rB),
+ "td $to, $rA, $rB", IntTrapD, []>;
+
//===----------------------------------------------------------------------===//
// PPC32 Load Instructions.
//
@@ -1250,6 +1402,10 @@ def LFIWZX : XForm_25<31, 887, (outs f8rc:$frD), (ins memrr:$src),
[(set f64:$frD, (PPClfiwzx xoaddr:$src))]>;
}
+// Load Multiple
+def LMW : DForm_1<46, (outs gprc:$rD), (ins memri:$src),
+ "lmw $rD, $src", LdStLMW, []>;
+
//===----------------------------------------------------------------------===//
// PPC32 Store Instructions.
//
@@ -1380,50 +1536,54 @@ def : Pat<(pre_store f32:$rS, iPTR:$ptrreg, iPTR:$ptroff),
def : Pat<(pre_store f64:$rS, iPTR:$ptrreg, iPTR:$ptroff),
(STFDUX $rS, $ptrreg, $ptroff)>;
-def SYNC : XForm_24_sync<31, 598, (outs), (ins),
- "sync", LdStSync,
- [(int_ppc_sync)]>;
+// Store Multiple
+def STMW : DForm_1<47, (outs), (ins gprc:$rS, memri:$dst),
+ "stmw $rS, $dst", LdStLMW, []>;
+
+def SYNC : XForm_24_sync<31, 598, (outs), (ins i32imm:$L),
+ "sync $L", LdStSync, []>;
+def : Pat<(int_ppc_sync), (SYNC 0)>;
//===----------------------------------------------------------------------===//
// PPC32 Arithmetic Instructions.
//
let PPC970_Unit = 1 in { // FXU Operations.
-def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$imm),
+def ADDI : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$imm),
"addi $rD, $rA, $imm", IntSimple,
- [(set i32:$rD, (add i32:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (add i32:$rA, imm32SExt16:$imm))]>;
let BaseName = "addic" in {
let Defs = [CARRY] in
def ADDIC : DForm_2<12, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic $rD, $rA, $imm", IntGeneral,
- [(set i32:$rD, (addc i32:$rA, immSExt16:$imm))]>,
+ [(set i32:$rD, (addc i32:$rA, imm32SExt16:$imm))]>,
RecFormRel, PPC970_DGroup_Cracked;
let Defs = [CARRY, CR0] in
def ADDICo : DForm_2<13, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"addic. $rD, $rA, $imm", IntGeneral,
[]>, isDOT, RecFormRel;
}
-def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolHi:$imm),
+def ADDIS : DForm_2<15, (outs gprc:$rD), (ins gprc_nor0:$rA, s17imm:$imm),
"addis $rD, $rA, $imm", IntSimple,
[(set i32:$rD, (add i32:$rA, imm16ShiftedSExt:$imm))]>;
let isCodeGenOnly = 1 in
-def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, symbolLo:$sym),
+def LA : DForm_2<14, (outs gprc:$rD), (ins gprc_nor0:$rA, s16imm:$sym),
"la $rD, $sym($rA)", IntGeneral,
[(set i32:$rD, (add i32:$rA,
(PPClo tglobaladdr:$sym, 0)))]>;
def MULLI : DForm_2< 7, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"mulli $rD, $rA, $imm", IntMulLI,
- [(set i32:$rD, (mul i32:$rA, immSExt16:$imm))]>;
+ [(set i32:$rD, (mul i32:$rA, imm32SExt16:$imm))]>;
let Defs = [CARRY] in
def SUBFIC : DForm_2< 8, (outs gprc:$rD), (ins gprc:$rA, s16imm:$imm),
"subfic $rD, $rA, $imm", IntGeneral,
- [(set i32:$rD, (subc immSExt16:$imm, i32:$rA))]>;
+ [(set i32:$rD, (subc imm32SExt16:$imm, i32:$rA))]>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in {
- def LI : DForm_2_r0<14, (outs gprc:$rD), (ins symbolLo:$imm),
+ def LI : DForm_2_r0<14, (outs gprc:$rD), (ins s16imm:$imm),
"li $rD, $imm", IntSimple,
- [(set i32:$rD, immSExt16:$imm)]>;
- def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins symbolHi:$imm),
+ [(set i32:$rD, imm32SExt16:$imm)]>;
+ def LIS : DForm_2_r0<15, (outs gprc:$rD), (ins s17imm:$imm),
"lis $rD, $imm", IntSimple,
[(set i32:$rD, imm16ShiftedSExt:$imm)]>;
}
@@ -1532,6 +1692,9 @@ let isCompare = 1, neverHasSideEffects = 1 in {
let Uses = [RM] in {
let neverHasSideEffects = 1 in {
+ defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB),
+ "fctiw", "$frD, $frB", FPGeneral,
+ []>;
defm FCTIWZ : XForm_26r<63, 15, (outs f8rc:$frD), (ins f8rc:$frB),
"fctiwz", "$frD, $frB", FPGeneral,
[(set f64:$frD, (PPCfctiwz f64:$frB))]>;
@@ -1540,23 +1703,13 @@ let Uses = [RM] in {
"frsp", "$frD, $frB", FPGeneral,
[(set f32:$frD, (fround f64:$frB))]>;
- // The frin -> nearbyint mapping is valid only in fast-math mode.
let Interpretation64Bit = 1 in
defm FRIND : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
"frin", "$frD, $frB", FPGeneral,
- [(set f64:$frD, (fnearbyint f64:$frB))]>;
+ [(set f64:$frD, (frnd f64:$frB))]>;
defm FRINS : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
"frin", "$frD, $frB", FPGeneral,
- [(set f32:$frD, (fnearbyint f32:$frB))]>;
- }
-
- // These pseudos expand to rint but also set FE_INEXACT when the result does
- // not equal the argument.
- let usesCustomInserter = 1, Defs = [RM] in { // FIXME: Model FPSCR!
- def FRINDrint : Pseudo<(outs f8rc:$frD), (ins f8rc:$frB),
- "#FRINDrint", [(set f64:$frD, (frint f64:$frB))]>;
- def FRINSrint : Pseudo<(outs f4rc:$frD), (ins f4rc:$frB),
- "#FRINSrint", [(set f32:$frD, (frint f32:$frB))]>;
+ [(set f32:$frD, (frnd f32:$frB))]>;
}
let neverHasSideEffects = 1 in {
@@ -1626,6 +1779,14 @@ defm FNEGD : XForm_26r<63, 40, (outs f8rc:$frD), (ins f8rc:$frB),
"fneg", "$frD, $frB", FPGeneral,
[(set f64:$frD, (fneg f64:$frB))]>;
+defm FCPSGNS : XForm_28r<63, 8, (outs f4rc:$frD), (ins f4rc:$frA, f4rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ [(set f32:$frD, (fcopysign f32:$frB, f32:$frA))]>;
+let Interpretation64Bit = 1 in
+defm FCPSGND : XForm_28r<63, 8, (outs f8rc:$frD), (ins f8rc:$frA, f8rc:$frB),
+ "fcpsgn", "$frD, $frA, $frB", FPGeneral,
+ [(set f64:$frD, (fcopysign f64:$frB, f64:$frA))]>;
+
// Reciprocal estimates.
defm FRE : XForm_26r<63, 24, (outs f8rc:$frD), (ins f8rc:$frB),
"fre", "$frD, $frB", FPGeneral,
@@ -1648,15 +1809,37 @@ def MCRF : XLForm_3<19, 0, (outs crrc:$BF), (ins crrc:$BFA),
"mcrf $BF, $BFA", BrMCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+def CRAND : XLForm_1<19, 257, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crand $CRD, $CRA, $CRB", BrCR, []>;
+
+def CRNAND : XLForm_1<19, 225, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crnand $CRD, $CRA, $CRB", BrCR, []>;
+
+def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "cror $CRD, $CRA, $CRB", BrCR, []>;
+
+def CRXOR : XLForm_1<19, 193, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crxor $CRD, $CRA, $CRB", BrCR, []>;
+
+def CRNOR : XLForm_1<19, 33, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crnor $CRD, $CRA, $CRB", BrCR, []>;
+
def CREQV : XLForm_1<19, 289, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "creqv $CRD, $CRA, $CRB", BrCR,
- []>;
+ "creqv $CRD, $CRA, $CRB", BrCR, []>;
+
+def CRANDC : XLForm_1<19, 129, (outs crbitrc:$CRD),
+ (ins crbitrc:$CRA, crbitrc:$CRB),
+ "crandc $CRD, $CRA, $CRB", BrCR, []>;
-def CROR : XLForm_1<19, 449, (outs crbitrc:$CRD),
+def CRORC : XLForm_1<19, 417, (outs crbitrc:$CRD),
(ins crbitrc:$CRA, crbitrc:$CRB),
- "cror $CRD, $CRA, $CRB", BrCR,
- []>;
+ "crorc $CRD, $CRA, $CRB", BrCR, []>;
let isCodeGenOnly = 1 in {
def CRSET : XLForm_1_ext<19, 289, (outs crbitrc:$dst), (ins),
@@ -1680,6 +1863,15 @@ def CR6UNSET: XLForm_1_ext<19, 193, (outs), (ins),
// XFX-Form instructions. Instructions that deal with SPRs.
//
+
+def MFSPR : XFXForm_1<31, 339, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mfspr $RT, $SPR", SprMFSPR>;
+def MTSPR : XFXForm_1<31, 467, (outs), (ins i32imm:$SPR, gprc:$RT),
+ "mtspr $SPR, $RT", SprMTSPR>;
+
+def MFTB : XFXForm_1<31, 371, (outs gprc:$RT), (ins i32imm:$SPR),
+ "mftb $RT, $SPR", SprMFTB>, Deprecated<DeprecatedMFTB>;
+
let Uses = [CTR] in {
def MFCTR : XFXForm_1_ext<31, 339, 9, (outs gprc:$rT), (ins),
"mfctr $rT", SprMFSPR>,
@@ -1690,6 +1882,12 @@ def MTCTR : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
"mtctr $rS", SprMTSPR>,
PPC970_DGroup_First, PPC970_Unit_FXU;
}
+let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [CTR] in {
+let Pattern = [(int_ppc_mtctr i32:$rS)] in
+def MTCTRloop : XFXForm_7_ext<31, 467, 9, (outs), (ins gprc:$rS),
+ "mtctr $rS", SprMTSPR>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+}
let Defs = [LR] in {
def MTLR : XFXForm_7_ext<31, 467, 8, (outs), (ins gprc:$rS),
@@ -1702,17 +1900,17 @@ def MFLR : XFXForm_1_ext<31, 339, 8, (outs gprc:$rT), (ins),
PPC970_DGroup_First, PPC970_Unit_FXU;
}
-// Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed like
-// a GPR on the PPC970. As such, copies in and out have the same performance
-// characteristics as an OR instruction.
-def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
- "mtspr 256, $rS", IntGeneral>,
- PPC970_DGroup_Single, PPC970_Unit_FXU;
-def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
- "mfspr $rT, 256", IntGeneral>,
- PPC970_DGroup_First, PPC970_Unit_FXU;
-
let isCodeGenOnly = 1 in {
+ // Move to/from VRSAVE: despite being a SPR, the VRSAVE register is renamed
+ // like a GPR on the PPC970. As such, copies in and out have the same
+ // performance characteristics as an OR instruction.
+ def MTVRSAVE : XFXForm_7_ext<31, 467, 256, (outs), (ins gprc:$rS),
+ "mtspr 256, $rS", IntGeneral>,
+ PPC970_DGroup_Single, PPC970_Unit_FXU;
+ def MFVRSAVE : XFXForm_1_ext<31, 339, 256, (outs gprc:$rT), (ins),
+ "mfspr $rT, 256", IntGeneral>,
+ PPC970_DGroup_First, PPC970_Unit_FXU;
+
def MTVRSAVEv : XFXForm_7_ext<31, 467, 256,
(outs VRSAVERC:$reg), (ins gprc:$rS),
"mtspr 256, $rS", IntGeneral>,
@@ -1736,34 +1934,23 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let neverHasSideEffects = 1 in {
-def MTCRF : XFXForm_5<31, 144, (outs crbitm:$FXM), (ins gprc:$rS),
- "mtcrf $FXM, $rS", BrMCRX>,
- PPC970_MicroCode, PPC970_Unit_CRU;
+def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
+ "mtocrf $FXM, $ST", BrMCRX>,
+ PPC970_DGroup_First, PPC970_Unit_CRU;
-// This is a pseudo for MFCR, which implicitly uses all 8 of its subregisters;
-// declaring that here gives the local register allocator problems with this:
-// vreg = MCRF CR0
-// MFCR <kill of whatever preg got assigned to vreg>
-// while not declaring it breaks DeadMachineInstructionElimination.
-// As it turns out, in all cases where we currently use this,
-// we're only interested in one subregister of it. Represent this in the
-// instruction to keep the register allocator from becoming confused.
-//
-// FIXME: Make this a real Pseudo instruction when the JIT switches to MC.
-let isCodeGenOnly = 1 in
-def MFCRpseud: XFXForm_3<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
- "#MFCRpseud", SprMFCR>,
+def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
+ "mtcrf $FXM, $rS", BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", SprMFCR>,
PPC970_DGroup_First, PPC970_Unit_CRU;
-} // neverHasSideEffects = 1
-let neverHasSideEffects = 1 in
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
"mfcr $rT", SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // neverHasSideEffects = 1
// Pseudo instruction to perform FADD in round-to-zero mode.
let usesCustomInserter = 1, Uses = [RM] in {
@@ -2004,7 +2191,7 @@ def : Pat<(or i32:$in, imm:$imm),
def : Pat<(xor i32:$in, imm:$imm),
(XORIS (XORI $in, (LO16 imm:$imm)), (HI16 imm:$imm))>;
// SUBFIC
-def : Pat<(sub immSExt16:$imm, i32:$in),
+def : Pat<(sub imm32SExt16:$imm, i32:$in),
(SUBFIC $in, imm:$imm)>;
// SHL/SRL
@@ -2097,7 +2284,7 @@ def : Pat<(f64 (extloadf32 xaddr:$src)),
def : Pat<(f64 (fextend f32:$src)),
(COPY_TO_REGCLASS $src, F8RC)>;
-def : Pat<(atomic_fence (imm), (imm)), (SYNC)>;
+def : Pat<(atomic_fence (imm), (imm)), (SYNC 0)>;
// Additional FNMSUB patterns: -a*c + b == -(a*c - b)
def : Pat<(fma (fneg f64:$A), f64:$C, f64:$B),
@@ -2109,6 +2296,12 @@ def : Pat<(fma (fneg f32:$A), f32:$C, f32:$B),
def : Pat<(fma f32:$A, (fneg f32:$C), f32:$B),
(FNMSUBS $A, $C, $B)>;
+// FCOPYSIGN's operand types need not agree.
+def : Pat<(fcopysign f64:$frB, f32:$frA),
+ (FCPSGND (COPY_TO_REGCLASS $frA, F8RC), $frB)>;
+def : Pat<(fcopysign f32:$frB, f64:$frA),
+ (FCPSGNS (COPY_TO_REGCLASS $frA, F4RC), $frB)>;
+
include "PPCInstrAltivec.td"
include "PPCInstr64Bit.td"
@@ -2123,6 +2316,41 @@ def ISYNC : XLForm_2_ext<19, 150, 0, 0, 0, (outs), (ins),
def ICBI : XForm_1a<31, 982, (outs), (ins memrr:$src),
"icbi $src", LdStICBI, []>;
+def EIEIO : XForm_24_eieio<31, 854, (outs), (ins),
+ "eieio", LdStLoad, []>;
+
+def WAIT : XForm_24_sync<31, 62, (outs), (ins i32imm:$L),
+ "wait $L", LdStLoad, []>;
+
+def MTMSR: XForm_mtmsr<31, 146, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsr $RS, $L", SprMTMSR>;
+
+def MFMSR : XForm_rs<31, 83, (outs gprc:$RT), (ins),
+ "mfmsr $RT", SprMFMSR, []>;
+
+def MTMSRD : XForm_mtmsr<31, 178, (outs), (ins gprc:$RS, i32imm:$L),
+ "mtmsrd $RS, $L", SprMTMSRD>;
+
+def SLBIE : XForm_16b<31, 434, (outs), (ins gprc:$RB),
+ "slbie $RB", SprSLBIE, []>;
+
+def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
+ "slbmte $RS, $RB", SprSLBMTE, []>;
+
+def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
+ "slbmfee $RT, $RB", SprSLBMFEE, []>;
+
+def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", SprSLBIA, []>;
+
+def TLBSYNC : XForm_0<31, 566, (outs), (ins),
+ "tlbsync", SprTLBSYNC, []>;
+
+def TLBIEL : XForm_16b<31, 274, (outs), (ins gprc:$RB),
+ "tlbiel $RB", SprTLBIEL, []>;
+
+def TLBIE : XForm_26<31, 306, (outs), (ins gprc:$RS, gprc:$RB),
+ "tlbie $RB,$RS", SprTLBIE, []>;
+
//===----------------------------------------------------------------------===//
// PowerPC Assembler Instruction Aliases
//
@@ -2143,66 +2371,339 @@ class PPCAsmPseudo<string asm, dag iops>
let isPseudo = 1;
}
-def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"sc", (SC 0)>;
+
+def : InstAlias<"sync", (SYNC 0)>;
+def : InstAlias<"msync", (SYNC 0)>;
+def : InstAlias<"lwsync", (SYNC 1)>;
+def : InstAlias<"ptesync", (SYNC 2)>;
+
+def : InstAlias<"wait", (WAIT 0)>;
+def : InstAlias<"waitrsv", (WAIT 1)>;
+def : InstAlias<"waitimpl", (WAIT 2)>;
+
+def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
+def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
+def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+def : InstAlias<"crnot $bx, $by", (CRNOR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
+
+def : InstAlias<"mtxer $Rx", (MTSPR 1, gprc:$Rx)>;
+def : InstAlias<"mfxer $Rx", (MFSPR gprc:$Rx, 1)>;
+def : InstAlias<"mftb $Rx", (MFTB gprc:$Rx, 268)>;
+def : InstAlias<"mftbu $Rx", (MFTB gprc:$Rx, 269)>;
+
+def : InstAlias<"xnop", (XORI R0, R0, 0)>;
+
+def : InstAlias<"mr $rA, $rB", (OR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"mr. $rA, $rB", (OR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"not $rA, $rB", (NOR8 g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+def : InstAlias<"not. $rA, $rB", (NOR8o g8rc:$rA, g8rc:$rB, g8rc:$rB)>;
+
+def : InstAlias<"mtcr $rA", (MTCRF8 255, g8rc:$rA)>;
+
+def LAx : PPCAsmPseudo<"la $rA, $addr", (ins gprc:$rA, memri:$addr)>;
+
+def SUBI : PPCAsmPseudo<"subi $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIS : PPCAsmPseudo<"subis $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBIC : PPCAsmPseudo<"subic $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+def SUBICo : PPCAsmPseudo<"subic. $rA, $rB, $imm",
+ (ins gprc:$rA, gprc:$rB, s16imm:$imm)>;
+
+def : InstAlias<"sub $rA, $rB, $rC", (SUBF8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"sub. $rA, $rB, $rC", (SUBF8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc $rA, $rB, $rC", (SUBFC8 g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+def : InstAlias<"subc. $rA, $rB, $rC", (SUBFC8o g8rc:$rA, g8rc:$rC, g8rc:$rB)>;
+
+def : InstAlias<"mtmsrd $RS", (MTMSRD gprc:$RS, 0)>;
+def : InstAlias<"mtmsr $RS", (MTMSR gprc:$RS, 0)>;
+
+def : InstAlias<"mfsprg $RT, 0", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg $RT, 1", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg $RT, 2", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg $RT, 3", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mfsprg0 $RT", (MFSPR gprc:$RT, 272)>;
+def : InstAlias<"mfsprg1 $RT", (MFSPR gprc:$RT, 273)>;
+def : InstAlias<"mfsprg2 $RT", (MFSPR gprc:$RT, 274)>;
+def : InstAlias<"mfsprg3 $RT", (MFSPR gprc:$RT, 275)>;
+
+def : InstAlias<"mtsprg 0, $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg 1, $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg 2, $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg 3, $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtsprg0 $RT", (MTSPR 272, gprc:$RT)>;
+def : InstAlias<"mtsprg1 $RT", (MTSPR 273, gprc:$RT)>;
+def : InstAlias<"mtsprg2 $RT", (MTSPR 274, gprc:$RT)>;
+def : InstAlias<"mtsprg3 $RT", (MTSPR 275, gprc:$RT)>;
+
+def : InstAlias<"mtasr $RS", (MTSPR 280, gprc:$RS)>;
+
+def : InstAlias<"mfdec $RT", (MFSPR gprc:$RT, 22)>;
+def : InstAlias<"mtdec $RT", (MTSPR 22, gprc:$RT)>;
+
+def : InstAlias<"mfpvr $RT", (MFSPR gprc:$RT, 287)>;
+
+def : InstAlias<"mfsdr1 $RT", (MFSPR gprc:$RT, 25)>;
+def : InstAlias<"mtsdr1 $RT", (MTSPR 25, gprc:$RT)>;
+
+def : InstAlias<"mfsrr0 $RT", (MFSPR gprc:$RT, 26)>;
+def : InstAlias<"mfsrr1 $RT", (MFSPR gprc:$RT, 27)>;
+def : InstAlias<"mtsrr0 $RT", (MTSPR 26, gprc:$RT)>;
+def : InstAlias<"mtsrr1 $RT", (MTSPR 27, gprc:$RT)>;
+
+def : InstAlias<"tlbie $RB", (TLBIE R0, gprc:$RB)>;
+
+def EXTLWI : PPCAsmPseudo<"extlwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTLWIo : PPCAsmPseudo<"extlwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTRWI : PPCAsmPseudo<"extrwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def EXTRWIo : PPCAsmPseudo<"extrwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSLWI : PPCAsmPseudo<"inslwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSLWIo : PPCAsmPseudo<"inslwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSRWI : PPCAsmPseudo<"insrwi $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def INSRWIo : PPCAsmPseudo<"insrwi. $rA, $rS, $n, $b",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n, u5imm:$b)>;
+def ROTRWI : PPCAsmPseudo<"rotrwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def ROTRWIo : PPCAsmPseudo<"rotrwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SLWI : PPCAsmPseudo<"slwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SLWIo : PPCAsmPseudo<"slwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
def SRWI : PPCAsmPseudo<"srwi $rA, $rS, $n",
(ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def SRWIo : PPCAsmPseudo<"srwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRRWI : PPCAsmPseudo<"clrrwi $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRRWIo : PPCAsmPseudo<"clrrwi. $rA, $rS, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$n)>;
+def CLRLSLWI : PPCAsmPseudo<"clrlslwi $rA, $rS, $b, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+def CLRLSLWIo : PPCAsmPseudo<"clrlslwi. $rA, $rS, $b, $n",
+ (ins gprc:$rA, gprc:$rS, u5imm:$b, u5imm:$n)>;
+
+def : InstAlias<"rotlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, u5imm:$n, 0, 31)>;
+def : InstAlias<"rotlw $rA, $rS, $rB", (RLWNM gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"rotlw. $rA, $rS, $rB", (RLWNMo gprc:$rA, gprc:$rS, gprc:$rB, 0, 31)>;
+def : InstAlias<"clrlwi $rA, $rS, $n", (RLWINM gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+def : InstAlias<"clrlwi. $rA, $rS, $n", (RLWINMo gprc:$rA, gprc:$rS, 0, u5imm:$n, 31)>;
+
+def EXTLDI : PPCAsmPseudo<"extldi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTLDIo : PPCAsmPseudo<"extldi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTRDI : PPCAsmPseudo<"extrdi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def EXTRDIo : PPCAsmPseudo<"extrdi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def INSRDI : PPCAsmPseudo<"insrdi $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def INSRDIo : PPCAsmPseudo<"insrdi. $rA, $rS, $n, $b",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n, u6imm:$b)>;
+def ROTRDI : PPCAsmPseudo<"rotrdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def ROTRDIo : PPCAsmPseudo<"rotrdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SLDI : PPCAsmPseudo<"sldi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def SLDIo : PPCAsmPseudo<"sldi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
def SRDI : PPCAsmPseudo<"srdi $rA, $rS, $n",
(ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
-
-def : InstAlias<"blt $cc, $dst", (BCC 12, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bgt $cc, $dst", (BCC 44, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"beq $cc, $dst", (BCC 76, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bun $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bso $cc, $dst", (BCC 108, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bge $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bnl $cc, $dst", (BCC 4, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"ble $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bng $cc, $dst", (BCC 36, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bne $cc, $dst", (BCC 68, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bnu $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
-def : InstAlias<"bns $cc, $dst", (BCC 100, crrc:$cc, condbrtarget:$dst)>;
-
-def : InstAlias<"bltlr $cc", (BCLR 12, crrc:$cc)>;
-def : InstAlias<"bgtlr $cc", (BCLR 44, crrc:$cc)>;
-def : InstAlias<"beqlr $cc", (BCLR 76, crrc:$cc)>;
-def : InstAlias<"bunlr $cc", (BCLR 108, crrc:$cc)>;
-def : InstAlias<"bsolr $cc", (BCLR 108, crrc:$cc)>;
-def : InstAlias<"bgelr $cc", (BCLR 4, crrc:$cc)>;
-def : InstAlias<"bnllr $cc", (BCLR 4, crrc:$cc)>;
-def : InstAlias<"blelr $cc", (BCLR 36, crrc:$cc)>;
-def : InstAlias<"bnglr $cc", (BCLR 36, crrc:$cc)>;
-def : InstAlias<"bnelr $cc", (BCLR 68, crrc:$cc)>;
-def : InstAlias<"bnulr $cc", (BCLR 100, crrc:$cc)>;
-def : InstAlias<"bnslr $cc", (BCLR 100, crrc:$cc)>;
-
-def : InstAlias<"bltctr $cc", (BCCTR 12, crrc:$cc)>;
-def : InstAlias<"bgtctr $cc", (BCCTR 44, crrc:$cc)>;
-def : InstAlias<"beqctr $cc", (BCCTR 76, crrc:$cc)>;
-def : InstAlias<"bunctr $cc", (BCCTR 108, crrc:$cc)>;
-def : InstAlias<"bsoctr $cc", (BCCTR 108, crrc:$cc)>;
-def : InstAlias<"bgectr $cc", (BCCTR 4, crrc:$cc)>;
-def : InstAlias<"bnlctr $cc", (BCCTR 4, crrc:$cc)>;
-def : InstAlias<"blectr $cc", (BCCTR 36, crrc:$cc)>;
-def : InstAlias<"bngctr $cc", (BCCTR 36, crrc:$cc)>;
-def : InstAlias<"bnectr $cc", (BCCTR 68, crrc:$cc)>;
-def : InstAlias<"bnuctr $cc", (BCCTR 100, crrc:$cc)>;
-def : InstAlias<"bnsctr $cc", (BCCTR 100, crrc:$cc)>;
-
-def : InstAlias<"bltctrl $cc", (BCCTRL 12, crrc:$cc)>;
-def : InstAlias<"bgtctrl $cc", (BCCTRL 44, crrc:$cc)>;
-def : InstAlias<"beqctrl $cc", (BCCTRL 76, crrc:$cc)>;
-def : InstAlias<"bunctrl $cc", (BCCTRL 108, crrc:$cc)>;
-def : InstAlias<"bsoctrl $cc", (BCCTRL 108, crrc:$cc)>;
-def : InstAlias<"bgectrl $cc", (BCCTRL 4, crrc:$cc)>;
-def : InstAlias<"bnlctrl $cc", (BCCTRL 4, crrc:$cc)>;
-def : InstAlias<"blectrl $cc", (BCCTRL 36, crrc:$cc)>;
-def : InstAlias<"bngctrl $cc", (BCCTRL 36, crrc:$cc)>;
-def : InstAlias<"bnectrl $cc", (BCCTRL 68, crrc:$cc)>;
-def : InstAlias<"bnuctrl $cc", (BCCTRL 100, crrc:$cc)>;
-def : InstAlias<"bnsctrl $cc", (BCCTRL 100, crrc:$cc)>;
+def SRDIo : PPCAsmPseudo<"srdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRRDI : PPCAsmPseudo<"clrrdi $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRRDIo : PPCAsmPseudo<"clrrdi. $rA, $rS, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$n)>;
+def CLRLSLDI : PPCAsmPseudo<"clrlsldi $rA, $rS, $b, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
+def CLRLSLDIo : PPCAsmPseudo<"clrlsldi. $rA, $rS, $b, $n",
+ (ins g8rc:$rA, g8rc:$rS, u6imm:$b, u6imm:$n)>;
+
+def : InstAlias<"rotldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, u6imm:$n, 0)>;
+def : InstAlias<"rotld $rA, $rS, $rB", (RLDCL g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"rotld. $rA, $rS, $rB", (RLDCLo g8rc:$rA, g8rc:$rS, gprc:$rB, 0)>;
+def : InstAlias<"clrldi $rA, $rS, $n", (RLDICL g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+def : InstAlias<"clrldi. $rA, $rS, $n", (RLDICLo g8rc:$rA, g8rc:$rS, 0, u6imm:$n)>;
+
+// These generic branch instruction forms are used for the assembler parser only.
+// Defs and Uses are conservative, since we don't know the BO value.
+let PPC970_Unit = 7 in {
+ let Defs = [CTR], Uses = [CTR, RM] in {
+ def gBC : BForm_3<16, 0, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
+ "bc $bo, $bi, $dst">;
+ def gBCA : BForm_3<16, 1, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
+ "bca $bo, $bi, $dst">;
+ }
+ let Defs = [LR, CTR], Uses = [CTR, RM] in {
+ def gBCL : BForm_3<16, 0, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, condbrtarget:$dst),
+ "bcl $bo, $bi, $dst">;
+ def gBCLA : BForm_3<16, 1, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
+ "bcla $bo, $bi, $dst">;
+ }
+ let Defs = [CTR], Uses = [CTR, LR, RM] in
+ def gBCLR : XLForm_2<19, 16, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bclr $bo, $bi, $bh", BrB, []>;
+ let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
+ def gBCLRL : XLForm_2<19, 16, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bclrl $bo, $bi, $bh", BrB, []>;
+ let Defs = [CTR], Uses = [CTR, LR, RM] in
+ def gBCCTR : XLForm_2<19, 528, 0, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bcctr $bo, $bi, $bh", BrB, []>;
+ let Defs = [LR, CTR], Uses = [CTR, LR, RM] in
+ def gBCCTRL : XLForm_2<19, 528, 1, (outs),
+ (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
+ "bcctrl $bo, $bi, $bh", BrB, []>;
+}
+def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>;
+def : InstAlias<"bcctrl $bo, $bi", (gBCCTRL u5imm:$bo, crbitrc:$bi, 0)>;
+
+multiclass BranchSimpleMnemonic1<string name, string pm, int bo> {
+ def : InstAlias<"b"#name#pm#" $bi, $dst", (gBC bo, crbitrc:$bi, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"a"#pm#" $bi, $dst", (gBCA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"lr"#pm#" $bi", (gBCLR bo, crbitrc:$bi, 0)>;
+ def : InstAlias<"b"#name#"l"#pm#" $bi, $dst", (gBCL bo, crbitrc:$bi, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"la"#pm#" $bi, $dst", (gBCLA bo, crbitrc:$bi, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"lrl"#pm#" $bi", (gBCLRL bo, crbitrc:$bi, 0)>;
+}
+multiclass BranchSimpleMnemonic2<string name, string pm, int bo>
+ : BranchSimpleMnemonic1<name, pm, bo> {
+ def : InstAlias<"b"#name#"ctr"#pm#" $bi", (gBCCTR bo, crbitrc:$bi, 0)>;
+ def : InstAlias<"b"#name#"ctrl"#pm#" $bi", (gBCCTRL bo, crbitrc:$bi, 0)>;
+}
+defm : BranchSimpleMnemonic2<"t", "", 12>;
+defm : BranchSimpleMnemonic2<"f", "", 4>;
+defm : BranchSimpleMnemonic2<"t", "-", 14>;
+defm : BranchSimpleMnemonic2<"f", "-", 6>;
+defm : BranchSimpleMnemonic2<"t", "+", 15>;
+defm : BranchSimpleMnemonic2<"f", "+", 7>;
+defm : BranchSimpleMnemonic1<"dnzt", "", 8>;
+defm : BranchSimpleMnemonic1<"dnzf", "", 0>;
+defm : BranchSimpleMnemonic1<"dzt", "", 10>;
+defm : BranchSimpleMnemonic1<"dzf", "", 2>;
+
+multiclass BranchExtendedMnemonicPM<string name, string pm, int bibo> {
+ def : InstAlias<"b"#name#pm#" $cc, $dst",
+ (BCC bibo, crrc:$cc, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#pm#" $dst",
+ (BCC bibo, CR0, condbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"a"#pm#" $cc, $dst",
+ (BCCA bibo, crrc:$cc, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"a"#pm#" $dst",
+ (BCCA bibo, CR0, abscondbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"lr"#pm#" $cc",
+ (BCLR bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"lr"#pm,
+ (BCLR bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"ctr"#pm#" $cc",
+ (BCCTR bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"ctr"#pm,
+ (BCCTR bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"l"#pm#" $cc, $dst",
+ (BCCL bibo, crrc:$cc, condbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"l"#pm#" $dst",
+ (BCCL bibo, CR0, condbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"la"#pm#" $cc, $dst",
+ (BCCLA bibo, crrc:$cc, abscondbrtarget:$dst)>;
+ def : InstAlias<"b"#name#"la"#pm#" $dst",
+ (BCCLA bibo, CR0, abscondbrtarget:$dst)>;
+
+ def : InstAlias<"b"#name#"lrl"#pm#" $cc",
+ (BCLRL bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"lrl"#pm,
+ (BCLRL bibo, CR0)>;
+
+ def : InstAlias<"b"#name#"ctrl"#pm#" $cc",
+ (BCCTRL bibo, crrc:$cc)>;
+ def : InstAlias<"b"#name#"ctrl"#pm,
+ (BCCTRL bibo, CR0)>;
+}
+multiclass BranchExtendedMnemonic<string name, int bibo> {
+ defm : BranchExtendedMnemonicPM<name, "", bibo>;
+ defm : BranchExtendedMnemonicPM<name, "-", !add(bibo, 2)>;
+ defm : BranchExtendedMnemonicPM<name, "+", !add(bibo, 3)>;
+}
+defm : BranchExtendedMnemonic<"lt", 12>;
+defm : BranchExtendedMnemonic<"gt", 44>;
+defm : BranchExtendedMnemonic<"eq", 76>;
+defm : BranchExtendedMnemonic<"un", 108>;
+defm : BranchExtendedMnemonic<"so", 108>;
+defm : BranchExtendedMnemonic<"ge", 4>;
+defm : BranchExtendedMnemonic<"nl", 4>;
+defm : BranchExtendedMnemonic<"le", 36>;
+defm : BranchExtendedMnemonic<"ng", 36>;
+defm : BranchExtendedMnemonic<"ne", 68>;
+defm : BranchExtendedMnemonic<"nu", 100>;
+defm : BranchExtendedMnemonic<"ns", 100>;
+
+def : InstAlias<"cmpwi $rA, $imm", (CMPWI CR0, gprc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpw $rA, $rB", (CMPW CR0, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmplwi $rA, $imm", (CMPLWI CR0, gprc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmplw $rA, $rB", (CMPLW CR0, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpdi $rA, $imm", (CMPDI CR0, g8rc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmpd $rA, $rB", (CMPD CR0, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"cmpldi $rA, $imm", (CMPLDI CR0, g8rc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpld $rA, $rB", (CMPLD CR0, g8rc:$rA, g8rc:$rB)>;
+
+def : InstAlias<"cmpi $bf, 0, $rA, $imm", (CMPWI crrc:$bf, gprc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmp $bf, 0, $rA, $rB", (CMPW crrc:$bf, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpli $bf, 0, $rA, $imm", (CMPLWI crrc:$bf, gprc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpl $bf, 0, $rA, $rB", (CMPLW crrc:$bf, gprc:$rA, gprc:$rB)>;
+def : InstAlias<"cmpi $bf, 1, $rA, $imm", (CMPDI crrc:$bf, g8rc:$rA, s16imm:$imm)>;
+def : InstAlias<"cmp $bf, 1, $rA, $rB", (CMPD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+def : InstAlias<"cmpli $bf, 1, $rA, $imm", (CMPLDI crrc:$bf, g8rc:$rA, u16imm:$imm)>;
+def : InstAlias<"cmpl $bf, 1, $rA, $rB", (CMPLD crrc:$bf, g8rc:$rA, g8rc:$rB)>;
+
+multiclass TrapExtendedMnemonic<string name, int to> {
+ def : InstAlias<"td"#name#"i $rA, $imm", (TDI to, g8rc:$rA, s16imm:$imm)>;
+ def : InstAlias<"td"#name#" $rA, $rB", (TD to, g8rc:$rA, g8rc:$rB)>;
+ def : InstAlias<"tw"#name#"i $rA, $imm", (TWI to, gprc:$rA, s16imm:$imm)>;
+ def : InstAlias<"tw"#name#" $rA, $rB", (TW to, gprc:$rA, gprc:$rB)>;
+}
+defm : TrapExtendedMnemonic<"lt", 16>;
+defm : TrapExtendedMnemonic<"le", 20>;
+defm : TrapExtendedMnemonic<"eq", 4>;
+defm : TrapExtendedMnemonic<"ge", 12>;
+defm : TrapExtendedMnemonic<"gt", 8>;
+defm : TrapExtendedMnemonic<"nl", 12>;
+defm : TrapExtendedMnemonic<"ne", 24>;
+defm : TrapExtendedMnemonic<"ng", 20>;
+defm : TrapExtendedMnemonic<"llt", 2>;
+defm : TrapExtendedMnemonic<"lle", 6>;
+defm : TrapExtendedMnemonic<"lge", 5>;
+defm : TrapExtendedMnemonic<"lgt", 1>;
+defm : TrapExtendedMnemonic<"lnl", 5>;
+defm : TrapExtendedMnemonic<"lng", 6>;
+defm : TrapExtendedMnemonic<"u", 31>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
index cfcd749..5e3a48d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -71,8 +71,13 @@ static void EmitBranchToAt(uint64_t At, uint64_t To, bool isCall, bool is64Bit){
extern "C" void PPC32CompilationCallback();
extern "C" void PPC64CompilationCallback();
-#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
- !(defined(__ppc64__) || defined(__FreeBSD__))
+// The first clause of the preprocessor directive looks wrong, but it is
+// necessary when compiling this code on non-PowerPC hosts.
+#if (!defined(__ppc__) && !defined(__powerpc__)) || defined(__powerpc64__) || defined(__ppc64__)
+void PPC32CompilationCallback() {
+ llvm_unreachable("This is not a 32bit PowerPC, you can't execute this!");
+}
+#elif !defined(__ELF__)
// CompilationCallback stub - We can't use a C function with inline assembly in
// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
// write our own wrapper, which does things our way, so we have complete control
@@ -137,8 +142,8 @@ asm(
"bctr\n"
);
-#elif defined(__PPC__) && !defined(__ppc64__)
-// Linux & FreeBSD / PPC 32 support
+#else
+// ELF PPC 32 support
// CompilationCallback stub - We can't use a C function with inline assembly in
// it, because we the prolog/epilog inserted by GCC won't work for us. Instead,
@@ -197,15 +202,14 @@ asm(
"mtlr 0\n"
"bctr\n"
);
-#else
-void PPC32CompilationCallback() {
- llvm_unreachable("This is not a power pc, you can't execute this!");
-}
#endif
-#if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
- defined(__ppc64__)
-#ifdef __ELF__
+#if !defined(__powerpc64__) && !defined(__ppc64__)
+void PPC64CompilationCallback() {
+ llvm_unreachable("This is not a 64bit PowerPC, you can't execute this!");
+}
+#else
+# ifdef __ELF__
asm(
".text\n"
".align 2\n"
@@ -219,13 +223,13 @@ asm(
".align 4\n"
".type PPC64CompilationCallback,@function\n"
".L.PPC64CompilationCallback:\n"
-#else
+# else
asm(
".text\n"
".align 2\n"
".globl _PPC64CompilationCallback\n"
"_PPC64CompilationCallback:\n"
-#endif
+# endif
// Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the
// FIXME: need to save v[0-19] for altivec?
// Set up a proper stack frame
@@ -258,12 +262,12 @@ asm(
"ld 5, 280(1)\n" // stub's frame
"ld 4, 16(5)\n" // stub's lr
"li 5, 1\n" // 1 == 64 bit
-#ifdef __ELF__
+# ifdef __ELF__
"bl LLVMPPCCompilationCallback\n"
"nop\n"
-#else
+# else
"bl _LLVMPPCCompilationCallback\n"
-#endif
+# endif
"mtctr 3\n"
// Restore all int arg registers
"ld 10, 272(1)\n" "ld 9, 264(1)\n"
@@ -285,10 +289,6 @@ asm(
// XXX: any special TOC handling in the ELF case for JIT?
"bctr\n"
);
-#else
-void PPC64CompilationCallback() {
- llvm_unreachable("This is not a power pc, you can't execute this!");
-}
#endif
extern "C" {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index f8cf3a5..f61c8bf 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "PPC.h"
+#include "MCTargetDesc/PPCMCExpr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/CodeGen/AsmPrinter.h"
@@ -68,7 +69,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (MO.isGlobal()) {
StubSym =
MachineModuleInfoImpl::
- StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
} else {
Name.erase(Name.end()-5, Name.end());
@@ -94,7 +95,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
if (StubSym.getPointer() == 0) {
assert(MO.isGlobal() && "Extern symbol not handled yet");
StubSym = MachineModuleInfoImpl::
- StubValueTy(AP.Mang->getSymbol(MO.getGlobal()),
+ StubValueTy(AP.getSymbol(MO.getGlobal()),
!MO.getGlobal()->hasInternalLinkage());
}
return Sym;
@@ -111,31 +112,26 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
unsigned access = MO.getTargetFlags() & PPCII::MO_ACCESS_MASK;
switch (access) {
- case PPCII::MO_HA16: RefKind = isDarwin ?
- MCSymbolRefExpr::VK_PPC_DARWIN_HA16 :
- MCSymbolRefExpr::VK_PPC_GAS_HA16;
- break;
- case PPCII::MO_LO16: RefKind = isDarwin ?
- MCSymbolRefExpr::VK_PPC_DARWIN_LO16 :
- MCSymbolRefExpr::VK_PPC_GAS_LO16;
- break;
- case PPCII::MO_TPREL16_HA: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_HA;
- break;
- case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
- break;
- case PPCII::MO_DTPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_DTPREL16_LO;
- break;
- case PPCII::MO_TLSLD16_LO: RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO;
- break;
- case PPCII::MO_TOC16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TOC16_LO;
- break;
- }
+ case PPCII::MO_TPREL_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_TPREL_LO;
+ break;
+ case PPCII::MO_TPREL_HA:
+ RefKind = MCSymbolRefExpr::VK_PPC_TPREL_HA;
+ break;
+ case PPCII::MO_DTPREL_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_DTPREL_LO;
+ break;
+ case PPCII::MO_TLSLD_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO;
+ break;
+ case PPCII::MO_TOC_LO:
+ RefKind = MCSymbolRefExpr::VK_PPC_TOC_LO;
+ break;
+ case PPCII::MO_TLS:
+ RefKind = MCSymbolRefExpr::VK_PPC_TLS;
+ break;
+ }
- // FIXME: This isn't right, but we don't have a good way to express this in
- // the MC Level, see below.
- if (MO.getTargetFlags() & PPCII::MO_PIC_FLAG)
- RefKind = MCSymbolRefExpr::VK_None;
-
const MCExpr *Expr = MCSymbolRefExpr::Create(Symbol, RefKind, Ctx);
if (!MO.isJTI() && MO.getOffset())
@@ -149,10 +145,18 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
const MCExpr *PB = MCSymbolRefExpr::Create(MF->getPICBaseSymbol(), Ctx);
Expr = MCBinaryExpr::CreateSub(Expr, PB, Ctx);
- // FIXME: We have no way to make the result be VK_PPC_LO16/VK_PPC_HA16,
- // since it is not a symbol!
}
-
+
+ // Add ha16() / lo16() markers if required.
+ switch (access) {
+ case PPCII::MO_LO:
+ Expr = PPCMCExpr::CreateLo(Expr, isDarwin, Ctx);
+ break;
+ case PPCII::MO_HA:
+ Expr = PPCMCExpr::CreateHa(Expr, isDarwin, Ctx);
+ break;
+ }
+
return MCOperand::CreateExpr(Expr);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 40d1f3a..33f843d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -32,6 +32,9 @@ class PPCFunctionInfo : public MachineFunctionInfo {
///
int ReturnAddrSaveIndex;
+ /// Frame index where the old base pointer is stored.
+ int BasePointerSaveIndex;
+
/// MustSaveLR - Indicates whether LR is defined (or clobbered) in the current
/// function. This is only valid after the initial scan of the function by
/// PEI.
@@ -93,6 +96,7 @@ public:
explicit PPCFunctionInfo(MachineFunction &MF)
: FramePointerSaveIndex(0),
ReturnAddrSaveIndex(0),
+ BasePointerSaveIndex(0),
HasSpills(false),
HasNonRISpills(false),
SpillsCR(false),
@@ -113,6 +117,9 @@ public:
int getReturnAddrSaveIndex() const { return ReturnAddrSaveIndex; }
void setReturnAddrSaveIndex(int idx) { ReturnAddrSaveIndex = idx; }
+ int getBasePointerSaveIndex() const { return BasePointerSaveIndex; }
+ void setBasePointerSaveIndex(int Idx) { BasePointerSaveIndex = Idx; }
+
unsigned getMinReservedArea() const { return MinReservedArea; }
void setMinReservedArea(unsigned size) { MinReservedArea = size; }
@@ -160,7 +167,7 @@ public:
int getCRSpillFrameIndex() const { return CRSpillFrameIndex; }
void setCRSpillFrameIndex(int idx) { CRSpillFrameIndex = idx; }
- const SmallVector<unsigned, 3> &
+ const SmallVectorImpl<unsigned> &
getMustSaveCRs() const { return MustSaveCRs; }
void addMustSaveCR(unsigned Reg) { MustSaveCRs.push_back(Reg); }
};
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 2be6324..19ccbfc 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -48,12 +48,19 @@
using namespace llvm;
-PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
- const TargetInstrInfo &tii)
+static cl::opt<bool>
+EnableBasePointer("ppc-use-base-pointer", cl::Hidden, cl::init(true),
+ cl::desc("Enable use of a base pointer for complex stack frames"));
+
+static cl::opt<bool>
+AlwaysBasePointer("ppc-always-use-base-pointer", cl::Hidden, cl::init(false),
+ cl::desc("Force the use of a base pointer in every function"));
+
+PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST)
: PPCGenRegisterInfo(ST.isPPC64() ? PPC::LR8 : PPC::LR,
ST.isPPC64() ? 0 : 1,
ST.isPPC64() ? 0 : 1),
- Subtarget(ST), TII(tii) {
+ Subtarget(ST) {
ImmToIdxMap[PPC::LD] = PPC::LDX; ImmToIdxMap[PPC::STD] = PPC::STDX;
ImmToIdxMap[PPC::LBZ] = PPC::LBZX; ImmToIdxMap[PPC::STB] = PPC::STBX;
ImmToIdxMap[PPC::LHZ] = PPC::LHZX; ImmToIdxMap[PPC::LHA] = PPC::LHAX;
@@ -62,6 +69,7 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
ImmToIdxMap[PPC::STH] = PPC::STHX; ImmToIdxMap[PPC::STW] = PPC::STWX;
ImmToIdxMap[PPC::STFS] = PPC::STFSX; ImmToIdxMap[PPC::STFD] = PPC::STFDX;
ImmToIdxMap[PPC::ADDI] = PPC::ADD4;
+ ImmToIdxMap[PPC::LWA_32] = PPC::LWAX_32;
// 64-bit
ImmToIdxMap[PPC::LHA8] = PPC::LHAX8; ImmToIdxMap[PPC::LBZ8] = PPC::LBZX8;
@@ -92,32 +100,41 @@ PPCRegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
const uint16_t*
PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? CSR_Darwin64_SaveList :
- CSR_Darwin32_SaveList;
-
- return Subtarget.isPPC64() ? CSR_SVR464_SaveList : CSR_SVR432_SaveList;
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_Darwin64_Altivec_SaveList :
+ CSR_Darwin64_SaveList) :
+ (Subtarget.hasAltivec() ?
+ CSR_Darwin32_Altivec_SaveList :
+ CSR_Darwin32_SaveList);
+
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_SVR464_Altivec_SaveList :
+ CSR_SVR464_SaveList) :
+ (Subtarget.hasAltivec() ?
+ CSR_SVR432_Altivec_SaveList :
+ CSR_SVR432_SaveList);
}
const uint32_t*
PPCRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (Subtarget.isDarwinABI())
- return Subtarget.isPPC64() ? CSR_Darwin64_RegMask :
- CSR_Darwin32_RegMask;
-
- return Subtarget.isPPC64() ? CSR_SVR464_RegMask : CSR_SVR432_RegMask;
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_Darwin64_Altivec_RegMask :
+ CSR_Darwin64_RegMask) :
+ (Subtarget.hasAltivec() ?
+ CSR_Darwin32_Altivec_RegMask :
+ CSR_Darwin32_RegMask);
+
+ return Subtarget.isPPC64() ? (Subtarget.hasAltivec() ?
+ CSR_SVR464_Altivec_RegMask :
+ CSR_SVR464_RegMask) :
+ (Subtarget.hasAltivec() ?
+ CSR_SVR432_Altivec_RegMask :
+ CSR_SVR432_RegMask);
}
const uint32_t*
PPCRegisterInfo::getNoPreservedMask() const {
- // The naming here is inverted: The CSR_NoRegs_Altivec has the
- // Altivec registers masked so that they're not saved and restored around
- // instructions with this preserved mask.
-
- if (!Subtarget.hasAltivec())
- return CSR_NoRegs_Altivec_RegMask;
-
- if (Subtarget.isDarwin())
- return CSR_NoRegs_Darwin_RegMask;
return CSR_NoRegs_RegMask;
}
@@ -136,11 +153,24 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(PPC::FP);
Reserved.set(PPC::FP8);
+ // The BP register is also not really a register, but is the representation
+ // of the base pointer register used by setjmp.
+ Reserved.set(PPC::BP);
+ Reserved.set(PPC::BP8);
+
+ // The counter registers must be reserved so that counter-based loops can
+ // be correctly formed (and the mtctr instructions are not DCE'd).
+ Reserved.set(PPC::CTR);
+ Reserved.set(PPC::CTR8);
+
Reserved.set(PPC::R1);
Reserved.set(PPC::LR);
Reserved.set(PPC::LR8);
Reserved.set(PPC::RM);
+ if (!Subtarget.isDarwinABI() || !Subtarget.hasAltivec())
+ Reserved.set(PPC::VRSAVE);
+
// The SVR4 ABI reserves r2 and r13
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::R2); // System-reserved register
@@ -157,6 +187,9 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (PPCFI->needsFP(MF))
Reserved.set(PPC::X31);
+ if (hasBasePointer(MF))
+ Reserved.set(PPC::X30);
+
// The 64-bit SVR4 ABI reserves r2 for the TOC pointer.
if (Subtarget.isSVR4ABI()) {
Reserved.set(PPC::X2);
@@ -166,6 +199,15 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (PPCFI->needsFP(MF))
Reserved.set(PPC::R31);
+ if (hasBasePointer(MF))
+ Reserved.set(PPC::R30);
+
+ // Reserve Altivec registers when Altivec is unavailable.
+ if (!Subtarget.hasAltivec())
+ for (TargetRegisterClass::iterator I = PPC::VRRCRegClass.begin(),
+ IE = PPC::VRRCRegClass.end(); I != IE; ++I)
+ Reserved.set(*I);
+
return Reserved;
}
@@ -214,6 +256,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
MachineFunction &MF = *MBB.getParent();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
// Determine whether 64-bit pointers are used.
bool LP64 = Subtarget.isPPC64();
DebugLoc dl = MI.getDebugLoc();
@@ -226,8 +270,8 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
// Get stack alignments.
unsigned TargetAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
- if (MaxAlign > TargetAlign)
- report_fatal_error("Dynamic alloca with large aligns not supported");
+ assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
+ "Maximum call-frame size not sufficiently aligned");
// Determine the previous frame's address. If FrameSize can't be
// represented as 16 bits or we need special alignment, then we load the
@@ -252,40 +296,62 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
.addImm(0)
.addReg(PPC::R1);
}
-
+
+ bool KillNegSizeReg = MI.getOperand(1).isKill();
+ unsigned NegSizeReg = MI.getOperand(1).getReg();
+
// Grow the stack and update the stack pointer link, then determine the
// address of new allocated space.
if (LP64) {
+ if (MaxAlign > TargetAlign) {
+ unsigned UnalNegSizeReg = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
+
+ // Unfortunately, there is no andi, only andi., and we can't insert that
+ // here because we might clobber cr0 while it is live.
+ BuildMI(MBB, II, dl, TII.get(PPC::LI8), NegSizeReg)
+ .addImm(~(MaxAlign-1));
+
+ unsigned NegSizeReg1 = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(G8RC);
+ BuildMI(MBB, II, dl, TII.get(PPC::AND8), NegSizeReg)
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
+ KillNegSizeReg = true;
+ }
+
BuildMI(MBB, II, dl, TII.get(PPC::STDUX), PPC::X1)
.addReg(Reg, RegState::Kill)
.addReg(PPC::X1)
- .addReg(MI.getOperand(1).getReg());
- if (!MI.getOperand(1).isKill())
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
- .addReg(PPC::X1)
- .addImm(maxCallFrameSize);
- else
- // Implicitly kill the register.
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
- .addReg(PPC::X1)
- .addImm(maxCallFrameSize)
- .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI8), MI.getOperand(0).getReg())
+ .addReg(PPC::X1)
+ .addImm(maxCallFrameSize);
} else {
+ if (MaxAlign > TargetAlign) {
+ unsigned UnalNegSizeReg = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
+
+ // Unfortunately, there is no andi, only andi., and we can't insert that
+ // here because we might clobber cr0 while it is live.
+ BuildMI(MBB, II, dl, TII.get(PPC::LI), NegSizeReg)
+ .addImm(~(MaxAlign-1));
+
+ unsigned NegSizeReg1 = NegSizeReg;
+ NegSizeReg = MF.getRegInfo().createVirtualRegister(GPRC);
+ BuildMI(MBB, II, dl, TII.get(PPC::AND), NegSizeReg)
+ .addReg(UnalNegSizeReg, getKillRegState(KillNegSizeReg))
+ .addReg(NegSizeReg1, RegState::Kill);
+ KillNegSizeReg = true;
+ }
+
BuildMI(MBB, II, dl, TII.get(PPC::STWUX), PPC::R1)
.addReg(Reg, RegState::Kill)
.addReg(PPC::R1)
- .addReg(MI.getOperand(1).getReg());
-
- if (!MI.getOperand(1).isKill())
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
- .addReg(PPC::R1)
- .addImm(maxCallFrameSize);
- else
- // Implicitly kill the register.
- BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
- .addReg(PPC::R1)
- .addImm(maxCallFrameSize)
- .addReg(MI.getOperand(1).getReg(), RegState::ImplicitKill);
+ .addReg(NegSizeReg, getKillRegState(KillNegSizeReg));
+ BuildMI(MBB, II, dl, TII.get(PPC::ADDI), MI.getOperand(0).getReg())
+ .addReg(PPC::R1)
+ .addImm(maxCallFrameSize);
}
// Discard the DYNALLOC instruction.
@@ -307,6 +373,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -317,8 +384,8 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
unsigned SrcReg = MI.getOperand(0).getReg();
// We need to store the CR in the low 4-bits of the saved value. First, issue
- // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
- BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFCR8pseud : PPC::MFCRpseud), Reg)
+ // an MFOCRF to save all of the CRBits and, if needed, kill the SrcReg.
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MFOCRF8 : PPC::MFOCRF), Reg)
.addReg(SrcReg, getKillRegState(MI.getOperand(0).isKill()));
// If the saved register wasn't CR0, shift the bits left so that they are in
@@ -350,6 +417,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
bool LP64 = Subtarget.isPPC64();
@@ -377,7 +445,7 @@ void PPCRegisterInfo::lowerCRRestore(MachineBasicBlock::iterator II,
.addImm(31);
}
- BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTCRF8 : PPC::MTCRF), DestReg)
+ BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::MTOCRF8 : PPC::MTOCRF), DestReg)
.addReg(Reg, RegState::Kill);
// Discard the pseudo instruction.
@@ -391,6 +459,7 @@ void PPCRegisterInfo::lowerVRSAVESpilling(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -415,6 +484,7 @@ void PPCRegisterInfo::lowerVRSAVERestore(MachineBasicBlock::iterator II,
// Get the instruction's basic block.
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
DebugLoc dl = MI.getDebugLoc();
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
@@ -454,9 +524,8 @@ PPCRegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
return false;
}
-// Figure out if the offset in the instruction is shifted right two bits. This
-// is true for instructions like "STD", which the machine implicitly adds two
-// low zeros to.
+// Figure out if the offset in the instruction must be a multiple of 4.
+// This is true for instructions like "STD".
static bool usesIXAddr(const MachineInstr &MI) {
unsigned OpC = MI.getOpcode();
@@ -464,6 +533,7 @@ static bool usesIXAddr(const MachineInstr &MI) {
default:
return false;
case PPC::LWA:
+ case PPC::LWA_32:
case PPC::LD:
case PPC::STD:
return true;
@@ -493,9 +563,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineBasicBlock &MBB = *MI.getParent();
// Get the basic block's function.
MachineFunction &MF = *MBB.getParent();
+ // Get the instruction info.
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
// Get the frame info.
MachineFrameInfo *MFI = MF.getFrameInfo();
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
DebugLoc dl = MI.getDebugLoc();
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
@@ -533,12 +604,8 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Replace the FrameIndex with base register with GPR1 (SP) or GPR31 (FP).
-
- bool is64Bit = Subtarget.isPPC64();
- MI.getOperand(FIOperandNum).ChangeToRegister(TFI->hasFP(MF) ?
- (is64Bit ? PPC::X31 : PPC::R31) :
- (is64Bit ? PPC::X1 : PPC::R1),
- false);
+ MI.getOperand(FIOperandNum).ChangeToRegister(
+ FrameIndex < 0 ? getBaseRegister(MF) : getFrameRegister(MF), false);
// Figure out if the offset in the instruction is shifted right two bits.
bool isIXAddr = usesIXAddr(MI);
@@ -549,10 +616,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Now add the frame object offset to the offset from r1.
int Offset = MFI->getObjectOffset(FrameIndex);
- if (!isIXAddr)
- Offset += MI.getOperand(OffsetOperandNo).getImm();
- else
- Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
// If we're not using a Frame Pointer that has been set to the value of the
// SP before having the stack size subtracted from it, then add the stack size
@@ -560,8 +624,10 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Naked functions have stack size 0, although getStackSize may not reflect that
// because we didn't call all the pieces that compute it for naked functions.
if (!MF.getFunction()->getAttributes().
- hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked))
- Offset += MFI->getStackSize();
+ hasAttribute(AttributeSet::FunctionIndex, Attribute::Naked)) {
+ if (!(hasBasePointer(MF) && FrameIndex < 0))
+ Offset += MFI->getStackSize();
+ }
// If we can, encode the offset directly into the instruction. If this is a
// normal PPC "ri" instruction, any 16-bit value can be safely encoded. If
@@ -569,11 +635,9 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// clear can be encoded. This is extremely uncommon, because normally you
// only "std" to a stack slot that is at least 4-byte aligned, but it can
// happen in invalid code.
- if (OpC == PPC::DBG_VALUE || // DBG_VALUE is always Reg+Imm
- (!noImmForm &&
- isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0))) {
- if (isIXAddr)
- Offset >>= 2; // The actual encoded value has the low two bits zero.
+ assert(OpC != PPC::DBG_VALUE &&
+ "This should be handle in a target independent way");
+ if (!noImmForm && isInt<16>(Offset) && (!isIXAddr || (Offset & 3) == 0)) {
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
return;
}
@@ -581,6 +645,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset doesn't fit into a single register, scavenge one to build the
// offset in.
+ bool is64Bit = Subtarget.isPPC64();
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
const TargetRegisterClass *RC = is64Bit ? G8RC : GPRC;
@@ -626,12 +691,42 @@ unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return TFI->hasFP(MF) ? PPC::X31 : PPC::X1;
}
-unsigned PPCRegisterInfo::getEHExceptionRegister() const {
- return !Subtarget.isPPC64() ? PPC::R3 : PPC::X3;
+unsigned PPCRegisterInfo::getBaseRegister(const MachineFunction &MF) const {
+ if (!hasBasePointer(MF))
+ return getFrameRegister(MF);
+
+ return Subtarget.isPPC64() ? PPC::X30 : PPC::R30;
}
-unsigned PPCRegisterInfo::getEHHandlerRegister() const {
- return !Subtarget.isPPC64() ? PPC::R4 : PPC::X4;
+bool PPCRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
+ if (!EnableBasePointer)
+ return false;
+ if (AlwaysBasePointer)
+ return true;
+
+ // If we need to realign the stack, then the stack pointer can no longer
+ // serve as an offset into the caller's stack space. As a result, we need a
+ // base pointer.
+ return needsStackRealignment(MF);
+}
+
+bool PPCRegisterInfo::canRealignStack(const MachineFunction &MF) const {
+ if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
+ return false;
+
+ return true;
+}
+
+bool PPCRegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
+ unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment();
+ bool requiresRealignment =
+ ((MFI->getMaxAlignment() > StackAlign) ||
+ F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
+ Attribute::StackAlignment));
+
+ return requiresRealignment && canRealignStack(MF);
}
/// Returns true if the instruction's frame index
@@ -650,11 +745,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
}
unsigned OffsetOperandNo = getOffsetONFromFION(*MI, FIOperandNum);
-
- if (!usesIXAddr(*MI))
- Offset += MI->getOperand(OffsetOperandNo).getImm();
- else
- Offset += MI->getOperand(OffsetOperandNo).getImm() << 2;
+ Offset += MI->getOperand(OffsetOperandNo).getImm();
// It's the load/store FI references that cause issues, as it can be difficult
// to materialize the offset if it won't fit in the literal field. Estimate
@@ -711,9 +802,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
if (Ins != MBB->end())
DL = Ins->getDebugLoc();
+ const MachineFunction &MF = *MBB->getParent();
+ const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
const MCInstrDesc &MCID = TII.get(ADDriOpc);
MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
- const MachineFunction &MF = *MBB->getParent();
MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF));
BuildMI(*MBB, Ins, DL, MCID, BaseReg)
@@ -734,17 +826,7 @@ PPCRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
MI.getOperand(FIOperandNum).ChangeToRegister(BaseReg, false);
unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
-
- bool isIXAddr = usesIXAddr(MI);
- if (!isIXAddr)
- Offset += MI.getOperand(OffsetOperandNo).getImm();
- else
- Offset += MI.getOperand(OffsetOperandNo).getImm() << 2;
-
- // Figure out if the offset in the instruction is shifted right two bits.
- if (isIXAddr)
- Offset >>= 2; // The actual encoded value has the low two bits zero.
-
+ Offset += MI.getOperand(OffsetOperandNo).getImm();
MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset);
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 7a48b4b..dd3bb40 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -29,9 +29,8 @@ class Type;
class PPCRegisterInfo : public PPCGenRegisterInfo {
DenseMap<unsigned, unsigned> ImmToIdxMap;
const PPCSubtarget &Subtarget;
- const TargetInstrInfo &TII;
public:
- PPCRegisterInfo(const PPCSubtarget &SubTarget, const TargetInstrInfo &tii);
+ PPCRegisterInfo(const PPCSubtarget &SubTarget);
/// getPointerRegClass - Return the register class to use to hold pointers.
/// This is used for addressing modes.
@@ -93,9 +92,11 @@ public:
// Debug information queries.
unsigned getFrameRegister(const MachineFunction &MF) const;
- // Exception handling queries.
- unsigned getEHExceptionRegister() const;
- unsigned getEHHandlerRegister() const;
+ // Base pointer (stack realignment) support.
+ unsigned getBaseRegister(const MachineFunction &MF) const;
+ bool hasBasePointer(const MachineFunction &MF) const;
+ bool canRealignStack(const MachineFunction &MF) const;
+ bool needsStackRealignment(const MachineFunction &MF) const;
};
} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index 57a25f5..d566e2c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -11,11 +11,11 @@
//===----------------------------------------------------------------------===//
let Namespace = "PPC" in {
-def sub_lt : SubRegIndex;
-def sub_gt : SubRegIndex;
-def sub_eq : SubRegIndex;
-def sub_un : SubRegIndex;
-def sub_32 : SubRegIndex;
+def sub_lt : SubRegIndex<1>;
+def sub_gt : SubRegIndex<1, 1>;
+def sub_eq : SubRegIndex<1, 2>;
+def sub_un : SubRegIndex<1, 3>;
+def sub_32 : SubRegIndex<32>;
}
@@ -94,6 +94,10 @@ def ZERO8 : GP8<ZERO, "0">;
def FP : GPR<0 /* arbitrary */, "**FRAME POINTER**">;
def FP8 : GP8<FP, "**FRAME POINTER**">;
+// Representations of the base pointer used by setjmp.
+def BP : GPR<0 /* arbitrary */, "**BASE POINTER**">;
+def BP8 : GP8<BP, "**BASE POINTER**">;
+
// Condition register bits
def CR0LT : CRBIT< 0, "0">;
def CR0GT : CRBIT< 1, "1">;
@@ -150,7 +154,7 @@ def CTR : SPR<9, "ctr">, DwarfRegNum<[-2, 66]>;
def CTR8 : SPR<9, "ctr">, DwarfRegNum<[66, -2]>;
// VRsave register
-def VRSAVE: SPR<256, "VRsave">, DwarfRegNum<[109]>;
+def VRSAVE: SPR<256, "vrsave">, DwarfRegNum<[109]>;
// Carry bit. In the architecture this is really bit 0 of the XER register
// (which really is SPR register 1); this is the only bit interesting to a
@@ -172,11 +176,11 @@ def RM: SPR<512, "**ROUNDING MODE**">;
// then nonvolatiles in reverse order since stmw/lmw save from rN to r31
def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
(sequence "R%u", 30, 13),
- R31, R0, R1, FP)>;
+ R31, R0, R1, FP, BP)>;
def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
(sequence "X%u", 30, 14),
- X31, X13, X0, X1, FP8)>;
+ X31, X13, X0, X1, FP8, BP8)>;
// For some instructions r0 is special (representing the value 0 instead of
// the value in the r0 register), and we use these register subclasses to
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index 660c0c3..92ba69c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -108,6 +108,14 @@ def VecPerm : InstrItinClass;
def VecFPRound : InstrItinClass;
def VecVSL : InstrItinClass;
def VecVSR : InstrItinClass;
+def SprMTMSRD : InstrItinClass;
+def SprSLIE : InstrItinClass;
+def SprSLBIE : InstrItinClass;
+def SprSLBMTE : InstrItinClass;
+def SprSLBMFEE : InstrItinClass;
+def SprSLBIA : InstrItinClass;
+def SprTLBIEL : InstrItinClass;
+def SprTLBIE : InstrItinClass;
//===----------------------------------------------------------------------===//
// Processor instruction itineraries.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
index 8d5838e..1612cd2 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td
@@ -14,39 +14,8 @@
//===----------------------------------------------------------------------===//
// Functional units on the PowerPC A2 chip sets
//
-def IU0to3_0 : FuncUnit; // Fetch unit 1 to 4 slot 1
-def IU0to3_1 : FuncUnit; // Fetch unit 1 to 4 slot 2
-def IU0to3_2 : FuncUnit; // Fetch unit 1 to 4 slot 3
-def IU0to3_3 : FuncUnit; // Fetch unit 1 to 4 slot 4
-def IU4_0 : FuncUnit; // Instruction buffer slot 1
-def IU4_1 : FuncUnit; // Instruction buffer slot 2
-def IU4_2 : FuncUnit; // Instruction buffer slot 3
-def IU4_3 : FuncUnit; // Instruction buffer slot 4
-def IU4_4 : FuncUnit; // Instruction buffer slot 5
-def IU4_5 : FuncUnit; // Instruction buffer slot 6
-def IU4_6 : FuncUnit; // Instruction buffer slot 7
-def IU4_7 : FuncUnit; // Instruction buffer slot 8
-def IU5 : FuncUnit; // Dependency resolution
-def IU6 : FuncUnit; // Instruction issue
-def RF0 : FuncUnit;
-def XRF1 : FuncUnit;
-def XEX1 : FuncUnit; // Execution stage 1 for the XU pipeline
-def XEX2 : FuncUnit; // Execution stage 2 for the XU pipeline
-def XEX3 : FuncUnit; // Execution stage 3 for the XU pipeline
-def XEX4 : FuncUnit; // Execution stage 4 for the XU pipeline
-def XEX5 : FuncUnit; // Execution stage 5 for the XU pipeline
-def XEX6 : FuncUnit; // Execution stage 6 for the XU pipeline
-def FRF1 : FuncUnit;
-def FEX1 : FuncUnit; // Execution stage 1 for the FU pipeline
-def FEX2 : FuncUnit; // Execution stage 2 for the FU pipeline
-def FEX3 : FuncUnit; // Execution stage 3 for the FU pipeline
-def FEX4 : FuncUnit; // Execution stage 4 for the FU pipeline
-def FEX5 : FuncUnit; // Execution stage 5 for the FU pipeline
-def FEX6 : FuncUnit; // Execution stage 6 for the FU pipeline
-
-def CR_Bypass : Bypass; // The bypass for condition regs.
-//def GPR_Bypass : Bypass; // The bypass for general-purpose regs.
-//def FPR_Bypass : Bypass; // The bypass for floating-point regs.
+def XU : FuncUnit; // XU pipeline
+def FU : FuncUnit; // FI pipeline
//
// This file defines the itinerary class data for the PPC A2 processor.
@@ -55,699 +24,119 @@ def CR_Bypass : Bypass; // The bypass for condition regs.
def PPCA2Itineraries : ProcessorItineraries<
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3,
- IU4_0, IU4_1, IU4_2, IU4_3, IU4_4, IU4_5, IU4_6, IU4_7,
- IU5, IU6, RF0, XRF1, XEX1, XEX2, XEX3, XEX4, XEX5, XEX6,
- FRF1, FEX1, FEX2, FEX3, FEX4, FEX5, FEX6],
- [CR_Bypass, GPR_Bypass, FPR_Bypass], [
- InstrItinData<IntSimple , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntGeneral , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntCompare , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntDivW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<38, [XEX6]>],
- [53, 7, 7],
- [NoBypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMFFS , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMTFSB0 , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulHWU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntMulLI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotate , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntRotateDI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntShift , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<IntTrapD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<BrB , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<BrCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, CR_Bypass, CR_Bypass]>,
- InstrItinData<BrMCRX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7, 7],
- [CR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStDCBA , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBF , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStDCBI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 11],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLoad , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLoadUpd , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStore , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStStoreUpd, [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStICBI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTFD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<LdStSTFDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<LdStLFD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLFDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7, 7],
- [FPR_Bypass, GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStLHA , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLHAU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLMW , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [14, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStLWARX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDU , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [13, 7],
- [GPR_Bypass, GPR_Bypass]>,
- InstrItinData<LdStSTDCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSTWCX , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<13, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [26, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<LdStSync , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<12, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>]>,
- InstrItinData<SprISYNC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
- InstrItinData<SprMFSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [GPR_Bypass, NoBypass]>,
- InstrItinData<SprMTMSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprTLBSYNC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>]>,
- InstrItinData<SprMFCR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [10, 7],
- [GPR_Bypass, CR_Bypass]>,
- InstrItinData<SprMFMSR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [GPR_Bypass, NoBypass]>,
- InstrItinData<SprMFSPR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMFTB , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSPR , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<1, [XEX6]>],
- [15, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprMTSRIN , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprRFI , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<SprSC , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [XRF1]>,
- InstrStage<1, [XEX1]>, InstrStage<1, [XEX2]>,
- InstrStage<1, [XEX3]>, InstrStage<1, [XEX4]>,
- InstrStage<1, [XEX5]>, InstrStage<14, [XEX6]>],
- [29, 7],
- [NoBypass, GPR_Bypass]>,
- InstrItinData<FPGeneral , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPAddSub , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPCompare , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [13, 7, 7],
- [CR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivD , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<71, [FRF1], 0>,
- InstrStage<71, [FEX1], 0>,
- InstrStage<71, [FEX2], 0>,
- InstrStage<71, [FEX3], 0>,
- InstrStage<71, [FEX4], 0>,
- InstrStage<71, [FEX5], 0>,
- InstrStage<71, [FEX6]>],
- [86, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPDivS , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<58, [FRF1], 0>,
- InstrStage<58, [FEX1], 0>,
- InstrStage<58, [FEX2], 0>,
- InstrStage<58, [FEX3], 0>,
- InstrStage<58, [FEX4], 0>,
- InstrStage<58, [FEX5], 0>,
- InstrStage<58, [FEX6]>],
- [73, 7, 7],
- [NoBypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPSqrt , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<68, [FRF1], 0>,
- InstrStage<68, [FEX1], 0>,
- InstrStage<68, [FEX2], 0>,
- InstrStage<68, [FEX3], 0>,
- InstrStage<68, [FEX4], 0>,
- InstrStage<68, [FEX5], 0>,
- InstrStage<68, [FEX6]>],
- [86, 7], // FIXME: should be [86, 7] for double
- // and [82, 7] for single. Likewise,
- // the FEX? cycle count should be 68
- // for double and 64 for single.
- [NoBypass, FPR_Bypass]>,
- InstrItinData<FPFused , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7, 7, 7],
- [FPR_Bypass, FPR_Bypass, FPR_Bypass, FPR_Bypass]>,
- InstrItinData<FPRes , [InstrStage<4,
- [IU0to3_0, IU0to3_1, IU0to3_2, IU0to3_3]>,
- InstrStage<1, [IU4_0, IU4_1, IU4_2, IU4_3,
- IU4_4, IU4_5, IU4_6, IU4_7]>,
- InstrStage<1, [IU5]>, InstrStage<1, [IU6]>,
- InstrStage<1, [RF0]>, InstrStage<1, [FRF1]>,
- InstrStage<1, [FEX1]>, InstrStage<1, [FEX2]>,
- InstrStage<1, [FEX3]>, InstrStage<1, [FEX4]>,
- InstrStage<1, [FEX5]>, InstrStage<1, [FEX6]>],
- [15, 7],
- [FPR_Bypass, FPR_Bypass]>
+ [XU, FU], [], [
+ InstrItinData<IntSimple , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<IntGeneral , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntCompare , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntDivW , [InstrStage<1, [XU]>],
+ [39, 1, 1]>,
+ InstrItinData<IntDivD , [InstrStage<1, [XU]>],
+ [71, 1, 1]>,
+ InstrItinData<IntMulHW , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<IntMulHWU , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<IntMulLI , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<IntRotate , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntRotateD , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntRotateDI , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntShift , [InstrStage<1, [XU]>],
+ [2, 1, 1]>,
+ InstrItinData<IntTrapW , [InstrStage<1, [XU]>],
+ [2, 1]>,
+ InstrItinData<IntTrapD , [InstrStage<1, [XU]>],
+ [2, 1]>,
+ InstrItinData<BrB , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<BrCR , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<BrMCR , [InstrStage<1, [XU]>],
+ [5, 1, 1]>,
+ InstrItinData<BrMCRX , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBA , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBF , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStDCBI , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStLoad , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStLoadUpd , [InstrStage<1, [XU]>],
+ [6, 8, 1, 1]>,
+ InstrItinData<LdStLDU , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStStore , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStStoreUpd, [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStICBI, [InstrStage<1, [XU]>],
+ [16, 1, 1]>,
+ InstrItinData<LdStSTFD , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStSTFDU , [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStLFD , [InstrStage<1, [XU]>],
+ [7, 1, 1]>,
+ InstrItinData<LdStLFDU , [InstrStage<1, [XU]>],
+ [7, 9, 1, 1]>,
+ InstrItinData<LdStLHA , [InstrStage<1, [XU]>],
+ [6, 1, 1]>,
+ InstrItinData<LdStLHAU , [InstrStage<1, [XU]>],
+ [6, 8, 1, 1]>,
+ InstrItinData<LdStLWARX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSTD , [InstrStage<1, [XU]>],
+ [1, 1, 1]>,
+ InstrItinData<LdStSTDU , [InstrStage<1, [XU]>],
+ [2, 1, 1, 1]>,
+ InstrItinData<LdStSTDCX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSTWCX , [InstrStage<1, [XU]>],
+ [82, 1, 1]>, // L2 latency
+ InstrItinData<LdStSync , [InstrStage<1, [XU]>],
+ [6]>,
+ InstrItinData<SprISYNC , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<SprMTMSR , [InstrStage<1, [XU]>],
+ [16, 1]>,
+ InstrItinData<SprMFCR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprMFMSR , [InstrStage<1, [XU]>],
+ [4, 1]>,
+ InstrItinData<SprMFSPR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprMFTB , [InstrStage<1, [XU]>],
+ [4, 1]>,
+ InstrItinData<SprMTSPR , [InstrStage<1, [XU]>],
+ [6, 1]>,
+ InstrItinData<SprRFI , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<SprSC , [InstrStage<1, [XU]>],
+ [16]>,
+ InstrItinData<FPGeneral , [InstrStage<1, [FU]>],
+ [6, 1, 1]>,
+ InstrItinData<FPAddSub , [InstrStage<1, [FU]>],
+ [6, 1, 1]>,
+ InstrItinData<FPCompare , [InstrStage<1, [FU]>],
+ [5, 1, 1]>,
+ InstrItinData<FPDivD , [InstrStage<1, [FU]>],
+ [72, 1, 1]>,
+ InstrItinData<FPDivS , [InstrStage<1, [FU]>],
+ [59, 1, 1]>,
+ InstrItinData<FPSqrt , [InstrStage<1, [FU]>],
+ [69, 1, 1]>,
+ InstrItinData<FPFused , [InstrStage<1, [FU]>],
+ [6, 1, 1, 1]>,
+ InstrItinData<FPRes , [InstrStage<1, [FU]>],
+ [6, 1]>
]>;
// ===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
index 9bb779a..c189b9e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE500mc.td
@@ -36,6 +36,8 @@ def CFX_0 : FuncUnit; // CFX pipeline
def LSU_0 : FuncUnit; // LSU pipeline
def FPU_0 : FuncUnit; // FPU pipeline
+def CR_Bypass : Bypass;
+
def PPCE500mcItineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, LSU_0, FPU_0],
[CR_Bypass, GPR_Bypass, FPR_Bypass], [
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
index d7e11ac..7a24d20 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleE5500.td
@@ -39,6 +39,7 @@ def CFX_1 : FuncUnit; // CFX pipeline stage 1
// def LSU_0 : FuncUnit; // LSU pipeline
// def FPU_0 : FuncUnit; // FPU pipeline
+// def CR_Bypass : Bypass;
def PPCE5500Itineraries : ProcessorItineraries<
[DIS0, DIS1, SFX0, SFX1, BU, CFX_DivBypass, CFX_0, CFX_1,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
index a8f2b3f..7231ab1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -14,7 +14,11 @@
#include "PPCSubtarget.h"
#include "PPC.h"
#include "PPCRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/IR/Attributes.h"
#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Function.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Target/TargetMachine.h"
@@ -29,32 +33,70 @@ using namespace llvm;
PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: PPCGenSubtargetInfo(TT, CPU, FS)
- , StackAlignment(16)
- , DarwinDirective(PPC::DIR_NONE)
- , HasMFOCRF(false)
- , Has64BitSupport(false)
- , Use64BitRegs(false)
, IsPPC64(is64Bit)
- , HasAltivec(false)
- , HasQPX(false)
- , HasFSQRT(false)
- , HasFRE(false)
- , HasFRES(false)
- , HasFRSQRTE(false)
- , HasFRSQRTES(false)
- , HasRecipPrec(false)
- , HasSTFIWX(false)
- , HasLFIWAX(false)
- , HasFPRND(false)
- , HasFPCVT(false)
- , HasISEL(false)
- , HasPOPCNTD(false)
- , HasLDBRX(false)
- , IsBookE(false)
- , HasLazyResolverStubs(false)
- , IsJITCodeModel(false)
, TargetTriple(TT) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+}
+
+/// SetJITMode - This is called to inform the subtarget info that we are
+/// producing code for the JIT.
+void PPCSubtarget::SetJITMode() {
+ // JIT mode doesn't want lazy resolver stubs, it knows exactly where
+ // everything is. This matters for PPC64, which codegens in PIC mode without
+ // stubs.
+ HasLazyResolverStubs = false;
+
+ // Calls to external functions need to use indirect calls
+ IsJITCodeModel = true;
+}
+
+void PPCSubtarget::resetSubtargetFeatures(const MachineFunction *MF) {
+ AttributeSet FnAttrs = MF->getFunction()->getAttributes();
+ Attribute CPUAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-cpu");
+ Attribute FSAttr = FnAttrs.getAttribute(AttributeSet::FunctionIndex,
+ "target-features");
+ std::string CPU =
+ !CPUAttr.hasAttribute(Attribute::None) ? CPUAttr.getValueAsString() : "";
+ std::string FS =
+ !FSAttr.hasAttribute(Attribute::None) ? FSAttr.getValueAsString() : "";
+ if (!FS.empty()) {
+ initializeEnvironment();
+ resetSubtargetFeatures(CPU, FS);
+ }
+}
+void PPCSubtarget::initializeEnvironment() {
+ StackAlignment = 16;
+ DarwinDirective = PPC::DIR_NONE;
+ HasMFOCRF = false;
+ Has64BitSupport = false;
+ Use64BitRegs = false;
+ HasAltivec = false;
+ HasQPX = false;
+ HasFCPSGN = false;
+ HasFSQRT = false;
+ HasFRE = false;
+ HasFRES = false;
+ HasFRSQRTE = false;
+ HasFRSQRTES = false;
+ HasRecipPrec = false;
+ HasSTFIWX = false;
+ HasLFIWAX = false;
+ HasFPRND = false;
+ HasFPCVT = false;
+ HasISEL = false;
+ HasPOPCNTD = false;
+ HasLDBRX = false;
+ IsBookE = false;
+ DeprecatedMFTB = false;
+ DeprecatedDST = false;
+ HasLazyResolverStubs = false;
+ IsJITCodeModel = false;
+}
+
+void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Determine default and user specified characteristics
std::string CPUName = CPU;
if (CPUName.empty())
@@ -72,7 +114,7 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
std::string FullFS = FS;
// If we are generating code for ppc64, verify that options make sense.
- if (is64Bit) {
+ if (IsPPC64) {
Has64BitSupport = true;
// Silently force 64-bit register use on ppc64.
Use64BitRegs = true;
@@ -99,21 +141,11 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
// is enabled because external functions will assume this alignment.
if (hasQPX() || isBGQ())
StackAlignment = 32;
-}
-
-/// SetJITMode - This is called to inform the subtarget info that we are
-/// producing code for the JIT.
-void PPCSubtarget::SetJITMode() {
- // JIT mode doesn't want lazy resolver stubs, it knows exactly where
- // everything is. This matters for PPC64, which codegens in PIC mode without
- // stubs.
- HasLazyResolverStubs = false;
- // Calls to external functions need to use indirect calls
- IsJITCodeModel = true;
+ // Determine endianness.
+ IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le);
}
-
/// hasLazyResolverStub - Return true if accesses to the specified global have
/// to go through a dyld lazy resolution stub. This means that an extra load
/// is required to get the address of the global.
@@ -135,14 +167,7 @@ bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
- // FIXME: It would be best to use TargetSubtargetInfo::ANTIDEP_ALL here,
- // but we can't because we can't reassign the cr registers. There is a
- // dependence between the cr register and the RLWINM instruction used
- // to extract its value which the anti-dependency breaker can't currently
- // see. Maybe we should make a late-expanded pseudo to encode this dependency.
- // (the relevant code is in PPCDAGToDAGISel::SelectSETCC)
-
- Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
+ Mode = TargetSubtargetInfo::ANTIDEP_ALL;
CriticalPathRCs.clear();
@@ -151,9 +176,44 @@ bool PPCSubtarget::enablePostRAScheduler(
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
- CriticalPathRCs.push_back(&PPC::F8RCRegClass);
- CriticalPathRCs.push_back(&PPC::VRRCRegClass);
-
return OptLevel >= CodeGenOpt::Default;
}
+// Embedded cores need aggressive scheduling.
+static bool needsAggressiveScheduling(unsigned Directive) {
+ switch (Directive) {
+ default: return false;
+ case PPC::DIR_440:
+ case PPC::DIR_A2:
+ case PPC::DIR_E500mc:
+ case PPC::DIR_E5500:
+ return true;
+ }
+}
+
+bool PPCSubtarget::enableMachineScheduler() const {
+ // Enable MI scheduling for the embedded cores.
+ // FIXME: Enable this for all cores (some additional modeling
+ // may be necessary).
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
+void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const {
+ if (needsAggressiveScheduling(DarwinDirective)) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+
+ // Spilling is generally expensive on all PPC cores, so always enable
+ // register-pressure tracking.
+ Policy.ShouldTrackPressure = true;
+}
+
+bool PPCSubtarget::useAA() const {
+ // Use AA during code generation for the embedded cores.
+ return needsAggressiveScheduling(DarwinDirective);
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 65b4d21..c863a6e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -76,6 +76,8 @@ protected:
bool IsPPC64;
bool HasAltivec;
bool HasQPX;
+ bool HasVSX;
+ bool HasFCPSGN;
bool HasFSQRT;
bool HasFRE, HasFRES, HasFRSQRTE, HasFRSQRTES;
bool HasRecipPrec;
@@ -87,8 +89,11 @@ protected:
bool HasPOPCNTD;
bool HasLDBRX;
bool IsBookE;
+ bool DeprecatedMFTB;
+ bool DeprecatedDST;
bool HasLazyResolverStubs;
bool IsJITCodeModel;
+ bool IsLittleEndian;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
@@ -128,7 +133,7 @@ public:
// documentation are wrong; these are correct (i.e. "what gcc does").
if (isPPC64() && isSVR4ABI()) {
if (TargetTriple.getOS() == llvm::Triple::FreeBSD)
- return "E-p:64:64-f64:64:64-i64:64:64-f128:64:64-v128:128:128-n32:64";
+ return "E-p:64:64-f64:64:64-i64:64:64-v128:128:128-n32:64";
else
return "E-p:64:64-f64:64:64-i64:64:64-f128:128:128-v128:128:128-n32:64";
}
@@ -137,6 +142,13 @@ public:
: "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
}
+ /// \brief Reset the features for the PowerPC target.
+ virtual void resetSubtargetFeatures(const MachineFunction *MF);
+private:
+ void initializeEnvironment();
+ void resetSubtargetFeatures(StringRef CPU, StringRef FS);
+
+public:
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
///
bool isPPC64() const { return IsPPC64; }
@@ -159,7 +171,11 @@ public:
// isJITCodeModel - True if we're generating code for the JIT
bool isJITCodeModel() const { return IsJITCodeModel; }
+ // isLittleEndian - True if generating little-endian code
+ bool isLittleEndian() const { return IsLittleEndian; }
+
// Specific obvious features.
+ bool hasFCPSGN() const { return HasFCPSGN; }
bool hasFSQRT() const { return HasFSQRT; }
bool hasFRE() const { return HasFRE; }
bool hasFRES() const { return HasFRES; }
@@ -177,6 +193,8 @@ public:
bool hasPOPCNTD() const { return HasPOPCNTD; }
bool hasLDBRX() const { return HasLDBRX; }
bool isBookE() const { return IsBookE; }
+ bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
+ bool isDeprecatedDST() const { return DeprecatedDST; }
const Triple &getTargetTriple() const { return TargetTriple; }
@@ -194,6 +212,14 @@ public:
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const;
+
+ // Scheduling customization.
+ bool enableMachineScheduler() const;
+ void overrideSchedPolicy(MachineSchedPolicy &Policy,
+ MachineInstr *begin,
+ MachineInstr *end,
+ unsigned NumRegionInstrs) const;
+ bool useAA() const;
};
} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 14dc794..9acefe5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -30,6 +30,7 @@ extern "C" void LLVMInitializePowerPCTarget() {
// Register the targets
RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
+ RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
}
PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
@@ -48,6 +49,7 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT,
// The binutils for the BG/P are too old for CFI.
if (Subtarget.isBGP())
setMCUseCFI(false);
+ initAsmInfo();
}
void PPC32TargetMachine::anchor() { }
@@ -90,7 +92,7 @@ public:
return *getPPCTargetMachine().getSubtargetImpl();
}
- virtual bool addPreRegAlloc();
+ virtual bool addPreISel();
virtual bool addILPOpts();
virtual bool addInstSelector();
virtual bool addPreSched2();
@@ -102,9 +104,9 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
return new PPCPassConfig(this, PM);
}
-bool PPCPassConfig::addPreRegAlloc() {
+bool PPCPassConfig::addPreISel() {
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createPPCCTRLoops());
+ addPass(createPPCCTRLoops(getPPCTargetMachine()));
return false;
}
@@ -121,6 +123,12 @@ bool PPCPassConfig::addILPOpts() {
bool PPCPassConfig::addInstSelector() {
// Install an instruction selector.
addPass(createPPCISelDag(getPPCTargetMachine()));
+
+#ifndef NDEBUG
+ if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
+ addPass(createPPCCTRLoopsVerify());
+#endif
+
return false;
}
@@ -155,7 +163,7 @@ void PPCTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
// Add first the target-independent BasicTTI pass, then our PPC pass. This
// allows the PPC pass to delegate to the target independent layer when
// appropriate.
- PM.add(createBasicTargetTransformInfoPass(getTargetLowering()));
+ PM.add(createBasicTargetTransformInfoPass(this));
PM.add(createPPCTargetTransformInfoPass(this));
}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
new file mode 100644
index 0000000..ec1e606
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -0,0 +1,67 @@
+//===-- PPCTargetObjectFile.cpp - PPC Object Info -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCTargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/Mangler.h"
+
+using namespace llvm;
+
+void
+PPC64LinuxTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+ InitializeELF(TM.Options.UseInitArray);
+}
+
+const MCSection * PPC64LinuxTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+
+ const MCSection *DefaultSection =
+ TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+
+ if (DefaultSection != ReadOnlySection)
+ return DefaultSection;
+
+ // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
+ // when we have a constant that contains global relocations. This is
+ // necessary because of this ABI's handling of pointers to functions in
+ // a shared library. The address of a function is actually the address
+ // of a function descriptor, which resides in the .opd section. Generated
+ // code uses the descriptor directly rather than going via the GOT as some
+ // other ABIs do, which means that initialized function pointers must
+ // reference the descriptor. The linker must convert copy relocs of
+ // pointers to functions in shared libraries into dynamic relocations,
+ // because of an ordering problem with initialization of copy relocs and
+ // PLT entries. The dynamic relocation will be initialized by the dynamic
+ // linker, so we must use DataRelROSection instead of ReadOnlySection.
+ // For more information, see the description of ELIMINATE_COPY_RELOCS in
+ // GNU ld.
+ const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+
+ if (GVar && GVar->isConstant() &&
+ (GVar->getInitializer()->getRelocationInfo() ==
+ Constant::GlobalRelocations))
+ return DataRelROSection;
+
+ return DefaultSection;
+}
+
+const MCExpr *PPC64LinuxTargetObjectFile::
+getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
+ const MCExpr *Expr =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_PPC_DTPREL, getContext());
+ return MCBinaryExpr::CreateAdd(Expr,
+ MCConstantExpr::Create(0x8000, getContext()),
+ getContext());
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
new file mode 100644
index 0000000..262c522
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -0,0 +1,35 @@
+//===-- PPCTargetObjectFile.h - PPC Object Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+#define LLVM_TARGET_PPC_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+
+ /// PPC64LinuxTargetObjectFile - This implementation is used for
+ /// 64-bit PowerPC Linux.
+ class PPC64LinuxTargetObjectFile : public TargetLoweringObjectFileELF {
+
+ virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+ virtual const MCSection *
+ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const;
+
+ /// \brief Describe a TLS variable address within debug info.
+ virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const;
+ };
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
new file mode 100644
index 0000000..e876be1
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h
@@ -0,0 +1,23 @@
+//===-- PPCTargetStreamer.h - PPC Target Streamer --s-----------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCTARGETSTREAMER_H
+#define PPCTARGETSTREAMER_H
+
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+class PPCTargetStreamer : public MCTargetStreamer {
+public:
+ virtual ~PPCTargetStreamer();
+ virtual void emitTCEntry(const MCSymbol &S) = 0;
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 2504ba7..8879630 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -77,6 +77,7 @@ public:
/// \name Scalar TTI Implementations
/// @{
virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
+ virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const;
/// @}
@@ -129,6 +130,14 @@ PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const {
return PSK_Software;
}
+void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const {
+ if (ST->getDarwinDirective() == PPC::DIR_A2) {
+ // The A2 is in-order with a deep pipeline, and concatenation unrolling
+ // helps expose latency-hiding opportunities to the instruction scheduler.
+ UP.Partial = UP.Runtime = true;
+ }
+}
+
unsigned PPCTTI::getNumberOfRegisters(bool Vector) const {
if (Vector && !ST->hasAltivec())
return 0;
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index fa44331..5727dbc 100644
--- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -12,7 +12,7 @@
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
-Target llvm::ThePPC32Target, llvm::ThePPC64Target;
+Target llvm::ThePPC32Target, llvm::ThePPC64Target, llvm::ThePPC64LETarget;
extern "C" void LLVMInitializePowerPCTargetInfo() {
RegisterTarget<Triple::ppc, /*HasJIT=*/true>
@@ -20,4 +20,7 @@ extern "C" void LLVMInitializePowerPCTargetInfo() {
RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
Y(ThePPC64Target, "ppc64", "PowerPC 64");
+
+ RegisterTarget<Triple::ppc64le, /*HasJIT=*/true>
+ Z(ThePPC64LETarget, "ppc64le", "PowerPC 64 LE");
}
OpenPOWER on IntegriCloud