diff options
Diffstat (limited to 'contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp')
-rw-r--r-- | contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 595 |
1 files changed, 473 insertions, 122 deletions
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index 4c78992..c0c6055 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -51,9 +52,6 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" -static const MachineMemOperand::Flags MOSuppressPair = - MachineMemOperand::MOTargetFlag1; - static cl::opt<unsigned> TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)")); @@ -369,7 +367,7 @@ void AArch64InstrInfo::instantiateCondBranch( // Folded compare-and-branch // Note that we use addOperand instead of addReg to keep the flags. const MachineInstrBuilder MIB = - BuildMI(&MBB, DL, get(Cond[1].getImm())).addOperand(Cond[2]); + BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]); if (Cond.size() > 3) MIB.addImm(Cond[3].getImm()); MIB.addMBB(TBB); @@ -762,6 +760,128 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const { llvm_unreachable("Unknown opcode to check as cheap as a move!"); } +bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const { + switch (MI.getOpcode()) { + default: + return false; + + case AArch64::ADDWrs: + case AArch64::ADDXrs: + case AArch64::ADDSWrs: + case AArch64::ADDSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + if (ShiftVal == 0) + return true; + return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5; + } + + case AArch64::ADDWrx: + case AArch64::ADDXrx: + case AArch64::ADDXrx64: + case AArch64::ADDSWrx: + case AArch64::ADDSXrx: + case AArch64::ADDSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) <= 4; + } + } + + case AArch64::SUBWrs: + case AArch64::SUBSWrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31); + } + + case AArch64::SUBXrs: + case AArch64::SUBSXrs: { + unsigned Imm = MI.getOperand(3).getImm(); + unsigned ShiftVal = AArch64_AM::getShiftValue(Imm); + return ShiftVal == 0 || + (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63); + } + + case AArch64::SUBWrx: + case AArch64::SUBXrx: + case AArch64::SUBXrx64: + case AArch64::SUBSWrx: + case AArch64::SUBSXrx: + case AArch64::SUBSXrx64: { + unsigned Imm = MI.getOperand(3).getImm(); + switch (AArch64_AM::getArithExtendType(Imm)) { + default: + return false; + case AArch64_AM::UXTB: + case AArch64_AM::UXTH: + case AArch64_AM::UXTW: + case AArch64_AM::UXTX: + return AArch64_AM::getArithShiftValue(Imm) == 0; + } + } + + case AArch64::LDRBBroW: + case AArch64::LDRBBroX: + case AArch64::LDRBroW: + case AArch64::LDRBroX: + case AArch64::LDRDroW: + case AArch64::LDRDroX: + case AArch64::LDRHHroW: + case AArch64::LDRHHroX: + case AArch64::LDRHroW: + case AArch64::LDRHroX: + case AArch64::LDRQroW: + case AArch64::LDRQroX: + case AArch64::LDRSBWroW: + case AArch64::LDRSBWroX: + case AArch64::LDRSBXroW: + case AArch64::LDRSBXroX: + case AArch64::LDRSHWroW: + case AArch64::LDRSHWroX: + case AArch64::LDRSHXroW: + case AArch64::LDRSHXroX: + case AArch64::LDRSWroW: + case AArch64::LDRSWroX: + case AArch64::LDRSroW: + case AArch64::LDRSroX: + case AArch64::LDRWroW: + case AArch64::LDRWroX: + case AArch64::LDRXroW: + case AArch64::LDRXroX: + case AArch64::PRFMroW: + case AArch64::PRFMroX: + case AArch64::STRBBroW: + case AArch64::STRBBroX: + case AArch64::STRBroW: + case AArch64::STRBroX: + case AArch64::STRDroW: + case AArch64::STRDroX: + case AArch64::STRHHroW: + case AArch64::STRHHroX: + case AArch64::STRHroW: + case AArch64::STRHroX: + case AArch64::STRQroW: + case AArch64::STRQroX: + case AArch64::STRSroW: + case AArch64::STRSroX: + case AArch64::STRWroW: + case AArch64::STRWroX: + case AArch64::STRXroW: + case AArch64::STRXroX: { + unsigned IsSigned = MI.getOperand(3).getImm(); + return !IsSigned; + } + } +} + bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg, unsigned &SubIdx) const { @@ -913,7 +1033,7 @@ static bool UpdateOperandRegClass(MachineInstr &Instr) { /// \brief Return the opcode that does not set flags when possible - otherwise /// return the original opcode. The caller is responsible to do the actual /// substitution and legality checking. -static unsigned convertFlagSettingOpcode(const MachineInstr &MI) { +static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) { // Don't convert all compare instructions, because for some the zero register // encoding becomes the sp register. bool MIDefinesZeroReg = false; @@ -1022,7 +1142,7 @@ bool AArch64InstrInfo::optimizeCompareInstr( return true; } unsigned Opc = CmpInstr.getOpcode(); - unsigned NewOpc = convertFlagSettingOpcode(CmpInstr); + unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr); if (NewOpc == Opc) return false; const MCInstrDesc &MCID = get(NewOpc); @@ -1159,6 +1279,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::HI: // Z clear and C set case AArch64CC::LS: // Z set or C clear UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::HS: // C set case AArch64CC::LO: // C clear UsedFlags.C = true; @@ -1177,6 +1298,7 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) { case AArch64CC::GT: // Z clear, N and V the same case AArch64CC::LE: // Z set, N and V differ UsedFlags.Z = true; + LLVM_FALLTHROUGH; case AArch64CC::GE: // N and V the same case AArch64CC::LT: // N and V differ UsedFlags.N = true; @@ -1299,16 +1421,16 @@ bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const { .addMemOperand(*MI.memoperands_begin()); } else if (TM.getCodeModel() == CodeModel::Large) { BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg) - .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); + .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); + .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC).addImm(16); + .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC).addImm(32); BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg) .addReg(Reg, RegState::Kill) - .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC).addImm(0); + .addGlobalAddress(GV, 0, AArch64II::MO_G3).addImm(48); BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg) .addReg(Reg, RegState::Kill) .addImm(0) @@ -1345,14 +1467,6 @@ bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) const { case AArch64::BICSXrs: case AArch64::BICWrs: case AArch64::BICXrs: - case AArch64::CRC32Brr: - case AArch64::CRC32CBrr: - case AArch64::CRC32CHrr: - case AArch64::CRC32CWrr: - case AArch64::CRC32CXrr: - case AArch64::CRC32Hrr: - case AArch64::CRC32Wrr: - case AArch64::CRC32Xrr: case AArch64::EONWrs: case AArch64::EONXrs: case AArch64::EORWrs: @@ -1598,6 +1712,13 @@ void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) const { (*MI.memoperands_begin())->setFlags(MOSuppressPair); } +/// Check all MachineMemOperands for a hint that the load/store is strided. +bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) const { + return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) { + return MMO->getFlags() & MOStridedAccess; + }); +} + bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) const { switch (Opc) { default: @@ -1691,16 +1812,59 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( } else return false; - // Offset is calculated as the immediate operand multiplied by the scaling factor. - // Unscaled instructions have scaling factor set to 1. + // Get the scaling factor for the instruction and set the width for the + // instruction. unsigned Scale = 0; - switch (LdSt.getOpcode()) { + int64_t Dummy1, Dummy2; + + // If this returns false, then it's an instruction we don't want to handle. + if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2)) + return false; + + // Compute the offset. Offset is calculated as the immediate operand + // multiplied by the scaling factor. Unscaled instructions have scaling factor + // set to 1. + if (LdSt.getNumExplicitOperands() == 3) { + BaseReg = LdSt.getOperand(1).getReg(); + Offset = LdSt.getOperand(2).getImm() * Scale; + } else { + assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); + BaseReg = LdSt.getOperand(2).getReg(); + Offset = LdSt.getOperand(3).getImm() * Scale; + } + return true; +} + +MachineOperand& +AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const { + assert(LdSt.mayLoadOrStore() && "Expected a memory operation."); + MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands()-1); + assert(OfsOp.isImm() && "Offset operand wasn't immediate."); + return OfsOp; +} + +bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, + unsigned &Width, int64_t &MinOffset, + int64_t &MaxOffset) const { + switch (Opcode) { + // Not a memory operation or something we want to handle. default: + Scale = Width = 0; + MinOffset = MaxOffset = 0; return false; + case AArch64::STRWpost: + case AArch64::LDRWpost: + Width = 32; + Scale = 4; + MinOffset = -256; + MaxOffset = 255; + break; case AArch64::LDURQi: case AArch64::STURQi: Width = 16; Scale = 1; + MinOffset = -256; + MaxOffset = 255; break; case AArch64::LDURXi: case AArch64::LDURDi: @@ -1708,6 +1872,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STURDi: Width = 8; Scale = 1; + MinOffset = -256; + MaxOffset = 255; break; case AArch64::LDURWi: case AArch64::LDURSi: @@ -1716,6 +1882,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STURSi: Width = 4; Scale = 1; + MinOffset = -256; + MaxOffset = 255; break; case AArch64::LDURHi: case AArch64::LDURHHi: @@ -1725,6 +1893,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STURHHi: Width = 2; Scale = 1; + MinOffset = -256; + MaxOffset = 255; break; case AArch64::LDURBi: case AArch64::LDURBBi: @@ -1734,6 +1904,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STURBBi: Width = 1; Scale = 1; + MinOffset = -256; + MaxOffset = 255; break; case AArch64::LDPQi: case AArch64::LDNPQi: @@ -1741,10 +1913,14 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STNPQi: Scale = 16; Width = 32; + MinOffset = -64; + MaxOffset = 63; break; case AArch64::LDRQui: case AArch64::STRQui: Scale = Width = 16; + MinOffset = 0; + MaxOffset = 4095; break; case AArch64::LDPXi: case AArch64::LDPDi: @@ -1756,12 +1932,16 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STNPDi: Scale = 8; Width = 16; + MinOffset = -64; + MaxOffset = 63; break; case AArch64::LDRXui: case AArch64::LDRDui: case AArch64::STRXui: case AArch64::STRDui: Scale = Width = 8; + MinOffset = 0; + MaxOffset = 4095; break; case AArch64::LDPWi: case AArch64::LDPSi: @@ -1773,6 +1953,8 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STNPSi: Scale = 4; Width = 8; + MinOffset = -64; + MaxOffset = 63; break; case AArch64::LDRWui: case AArch64::LDRSui: @@ -1780,29 +1962,27 @@ bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth( case AArch64::STRWui: case AArch64::STRSui: Scale = Width = 4; + MinOffset = 0; + MaxOffset = 4095; break; case AArch64::LDRHui: case AArch64::LDRHHui: case AArch64::STRHui: case AArch64::STRHHui: Scale = Width = 2; + MinOffset = 0; + MaxOffset = 4095; break; case AArch64::LDRBui: case AArch64::LDRBBui: case AArch64::STRBui: case AArch64::STRBBui: Scale = Width = 1; + MinOffset = 0; + MaxOffset = 4095; break; } - if (LdSt.getNumExplicitOperands() == 3) { - BaseReg = LdSt.getOperand(1).getReg(); - Offset = LdSt.getOperand(2).getImm() * Scale; - } else { - assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands"); - BaseReg = LdSt.getOperand(2).getReg(); - Offset = LdSt.getOperand(3).getImm() * Scale; - } return true; } @@ -1903,88 +2083,6 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt, return Offset1 + 1 == Offset2; } -bool AArch64InstrInfo::shouldScheduleAdjacent( - const MachineInstr &First, const MachineInstr &Second) const { - if (Subtarget.hasArithmeticBccFusion()) { - // Fuse CMN, CMP, TST followed by Bcc. - unsigned SecondOpcode = Second.getOpcode(); - if (SecondOpcode == AArch64::Bcc) { - switch (First.getOpcode()) { - default: - return false; - case AArch64::ADDSWri: - case AArch64::ADDSWrr: - case AArch64::ADDSXri: - case AArch64::ADDSXrr: - case AArch64::ANDSWri: - case AArch64::ANDSWrr: - case AArch64::ANDSXri: - case AArch64::ANDSXrr: - case AArch64::SUBSWri: - case AArch64::SUBSWrr: - case AArch64::SUBSXri: - case AArch64::SUBSXrr: - case AArch64::BICSWrr: - case AArch64::BICSXrr: - return true; - case AArch64::ADDSWrs: - case AArch64::ADDSXrs: - case AArch64::ANDSWrs: - case AArch64::ANDSXrs: - case AArch64::SUBSWrs: - case AArch64::SUBSXrs: - case AArch64::BICSWrs: - case AArch64::BICSXrs: - // Shift value can be 0 making these behave like the "rr" variant... - return !hasShiftedReg(Second); - } - } - } - if (Subtarget.hasArithmeticCbzFusion()) { - // Fuse ALU operations followed by CBZ/CBNZ. - unsigned SecondOpcode = Second.getOpcode(); - if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX || - SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) { - switch (First.getOpcode()) { - default: - return false; - case AArch64::ADDWri: - case AArch64::ADDWrr: - case AArch64::ADDXri: - case AArch64::ADDXrr: - case AArch64::ANDWri: - case AArch64::ANDWrr: - case AArch64::ANDXri: - case AArch64::ANDXrr: - case AArch64::EORWri: - case AArch64::EORWrr: - case AArch64::EORXri: - case AArch64::EORXrr: - case AArch64::ORRWri: - case AArch64::ORRWrr: - case AArch64::ORRXri: - case AArch64::ORRXrr: - case AArch64::SUBWri: - case AArch64::SUBWrr: - case AArch64::SUBXri: - case AArch64::SUBXrr: - return true; - case AArch64::ADDWrs: - case AArch64::ADDXrs: - case AArch64::ANDWrs: - case AArch64::ANDXrs: - case AArch64::SUBWrs: - case AArch64::SUBXrs: - case AArch64::BICWrs: - case AArch64::BICXrs: - // Shift value can be 0 making these behave like the "rr" variant... - return !hasShiftedReg(Second); - } - } - } - return false; -} - MachineInstr *AArch64InstrInfo::emitFrameIndexDebugValue( MachineFunction &MF, int FrameIx, uint64_t Offset, const MDNode *Var, const MDNode *Expr, const DebugLoc &DL) const { @@ -2339,7 +2437,7 @@ void AArch64InstrInfo::storeRegToStackSlot( PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align); unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::STRBui; @@ -2443,7 +2541,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( unsigned Opc = 0; bool Offset = true; - switch (RC->getSize()) { + switch (TRI->getSpillSize(*RC)) { case 1: if (AArch64::FPR8RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRBui; @@ -2668,7 +2766,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( }; if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) { - assert(getRegClass(DstReg)->getSize() == getRegClass(SrcReg)->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) == + TRI.getRegSizeInBits(*getRegClass(SrcReg)) && "Mismatched register size in non subreg COPY"); if (IsSpill) storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex, @@ -2754,7 +2853,8 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl( } if (FillRC) { - assert(getRegClass(SrcReg)->getSize() == FillRC->getSize() && + assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) == + TRI.getRegSizeInBits(*FillRC) && "Mismatched regclass size on folded subreg COPY"); loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI); MachineInstr &LoadMI = *--InsertPt; @@ -3044,7 +3144,7 @@ bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, return false; } -void AArch64InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { +void AArch64InstrInfo::getNoop(MCInst &NopInst) const { NopInst.setOpcode(AArch64::HINT); NopInst.addOperand(MCOperand::createImm(0)); } @@ -3224,7 +3324,7 @@ static bool getMaddPatterns(MachineInstr &Root, // When NZCV is live bail out. if (Cmp_NZCV == -1) return false; - unsigned NewOpc = convertFlagSettingOpcode(Root); + unsigned NewOpc = convertToNonFlagSettingOpc(Root); // When opcode can't change bail out. // CHECKME: do we miss any cases for opcode conversion? if (NewOpc == Opc) @@ -3444,6 +3544,10 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { + Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); + Found = true; + } break; case AArch64::FSUBDrr: if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { @@ -3458,6 +3562,10 @@ static bool getFMAPatterns(MachineInstr &Root, Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); Found = true; } + if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { + Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); + Found = true; + } break; case AArch64::FSUBv2f32: if (canCombineWithFMUL(MBB, Root.getOperand(2), @@ -3512,6 +3620,8 @@ AArch64InstrInfo::isThroughputPattern(MachineCombinerPattern Pattern) const { case MachineCombinerPattern::FMULADDD_OP2: case MachineCombinerPattern::FMULSUBD_OP1: case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP2: case MachineCombinerPattern::FMLAv1i64_indexed_OP1: @@ -3565,12 +3675,17 @@ enum class FMAInstKind { Default, Indexed, Accumulator }; /// F|MUL I=A,B,0 /// F|ADD R,I,C /// ==> F|MADD R,A,B,C +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the F|ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction /// \param IdxMulOpd is index of operand in Root that is the result of /// the F|MUL. In the example above IdxMulOpd is 1. /// \param MaddOpc the opcode fo the f|madd instruction +/// \param RC Register class of operands +/// \param kind of fma instruction (addressing mode) to be generated static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, @@ -3629,6 +3744,9 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// ADD R,I,Imm /// ==> ORR V, ZR, Imm /// ==> MADD R,A,B,V +/// \param MF Containing MachineFunction +/// \param MRI Register information +/// \param TII Target information /// \param Root is the ADD instruction /// \param [out] InsInstrs is a vector of machine instructions and will /// contain the generated madd instruction @@ -3637,6 +3755,7 @@ genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, /// \param MaddOpc the opcode fo the madd instruction /// \param VR is a virtual register that holds the value of an ADD operand /// (V in the example above). +/// \param RC Register class of operands static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs, @@ -3793,7 +3912,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MachineInstrBuilder MIB1 = BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR) .addReg(ZeroReg) - .addOperand(Root.getOperand(2)); + .add(Root.getOperand(2)); InsInstrs.push_back(MIB1); InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0)); MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC); @@ -4013,6 +4132,24 @@ void AArch64InstrInfo::genAlternativeCodeSequence( MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; } + + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: { + // FNMUL I=A,B,0 + // FSUB R,I,C + // ==> FNMADD R,A,B,C // = -A*B - C + // --- Create(FNMADD); + if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { + Opc = AArch64::FNMADDSrrr; + RC = &AArch64::FPR32RegClass; + } else { + Opc = AArch64::FNMADDDrrr; + RC = &AArch64::FPR64RegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + case MachineCombinerPattern::FMULSUBS_OP2: case MachineCombinerPattern::FMULSUBD_OP2: { // FMUL I=A,B,0 @@ -4028,6 +4165,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence( } MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC); break; + } case MachineCombinerPattern::FMLSv1i32_indexed_OP2: Opc = AArch64::FMLSv1i32_indexed; @@ -4084,7 +4222,6 @@ void AArch64InstrInfo::genAlternativeCodeSequence( FMAInstKind::Accumulator); } break; - } } // end switch (Pattern) // Record MUL and ADD/SUB for deletion DelInstrs.push_back(MUL); @@ -4094,26 +4231,36 @@ void AArch64InstrInfo::genAlternativeCodeSequence( /// \brief Replace csincr-branch sequence by simple conditional branch /// /// Examples: -/// 1. +/// 1. \code /// csinc w9, wzr, wzr, <condition code> /// tbnz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b.<inverted condition code> +/// \endcode /// -/// 2. +/// 2. \code /// csinc w9, wzr, wzr, <condition code> /// tbz w9, #0, 0x44 +/// \endcode /// to +/// \code /// b.<condition code> +/// \endcode /// /// Replace compare and branch sequence by TBZ/TBNZ instruction when the /// compare's constant operand is power of 2. /// /// Examples: +/// \code /// and w8, w8, #0x400 /// cbnz w8, L1 +/// \endcode /// to +/// \code /// tbnz w8, #10, L1 +/// \endcode /// /// \param MI Conditional Branch /// \return True when the simple conditional branch is generated @@ -4286,3 +4433,207 @@ AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const { {MO_TLS, "aarch64-tls"}}; return makeArrayRef(TargetFlags); } + +ArrayRef<std::pair<MachineMemOperand::Flags, const char *>> +AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const { + static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] = + {{MOSuppressPair, "aarch64-suppress-pair"}, + {MOStridedAccess, "aarch64-strided-access"}}; + return makeArrayRef(TargetFlags); +} + +unsigned AArch64InstrInfo::getOutliningBenefit(size_t SequenceSize, + size_t Occurrences, + bool CanBeTailCall) const { + unsigned NotOutlinedSize = SequenceSize * Occurrences; + unsigned OutlinedSize; + + // Is this candidate something we can outline as a tail call? + if (CanBeTailCall) { + // If yes, then we just outline the sequence and replace each of its + // occurrences with a branch instruction. + OutlinedSize = SequenceSize + Occurrences; + } else { + // If no, then we outline the sequence (SequenceSize), add a return (+1), + // and replace each occurrence with a save/restore to LR and a call + // (3 * Occurrences) + OutlinedSize = (SequenceSize + 1) + (3 * Occurrences); + } + + // Return the number of instructions saved by outlining this sequence. + return NotOutlinedSize > OutlinedSize ? NotOutlinedSize - OutlinedSize : 0; +} + +bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { + return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); +} + +AArch64GenInstrInfo::MachineOutlinerInstrType +AArch64InstrInfo::getOutliningType(MachineInstr &MI) const { + + MachineFunction *MF = MI.getParent()->getParent(); + AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>(); + + // Don't outline LOHs. + if (FuncInfo->getLOHRelated().count(&MI)) + return MachineOutlinerInstrType::Illegal; + + // Don't allow debug values to impact outlining type. + if (MI.isDebugValue() || MI.isIndirectDebugValue()) + return MachineOutlinerInstrType::Invisible; + + // Is this a terminator for a basic block? + if (MI.isTerminator()) { + + // Is this the end of a function? + if (MI.getParent()->succ_empty()) + return MachineOutlinerInstrType::Legal; + + // It's not, so don't outline it. + return MachineOutlinerInstrType::Illegal; + } + + // Don't outline positions. + if (MI.isPosition()) + return MachineOutlinerInstrType::Illegal; + + // Make sure none of the operands are un-outlinable. + for (const MachineOperand &MOP : MI.operands()) + if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || + MOP.isTargetIndex()) + return MachineOutlinerInstrType::Illegal; + + // Don't outline anything that uses the link register. + if (MI.modifiesRegister(AArch64::LR, &RI) || + MI.readsRegister(AArch64::LR, &RI)) + return MachineOutlinerInstrType::Illegal; + + // Does this use the stack? + if (MI.modifiesRegister(AArch64::SP, &RI) || + MI.readsRegister(AArch64::SP, &RI)) { + + // Is it a memory operation? + if (MI.mayLoadOrStore()) { + unsigned Base; // Filled with the base regiser of MI. + int64_t Offset; // Filled with the offset of MI. + unsigned DummyWidth; + + // Does it allow us to offset the base register and is the base SP? + if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) || + Base != AArch64::SP) + return MachineOutlinerInstrType::Illegal; + + // Find the minimum/maximum offset for this instruction and check if + // fixing it up would be in range. + int64_t MinOffset, MaxOffset; + unsigned DummyScale; + getMemOpInfo(MI.getOpcode(), DummyScale, DummyWidth, MinOffset, + MaxOffset); + + // TODO: We should really test what happens if an instruction overflows. + // This is tricky to test with IR tests, but when the outliner is moved + // to a MIR test, it really ought to be checked. + if (Offset + 16 < MinOffset || Offset + 16 > MaxOffset) + return MachineOutlinerInstrType::Illegal; + + // It's in range, so we can outline it. + return MachineOutlinerInstrType::Legal; + } + + // We can't fix it up, so don't outline it. + return MachineOutlinerInstrType::Illegal; + } + + return MachineOutlinerInstrType::Legal; +} + +void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const { + for (MachineInstr &MI : MBB) { + unsigned Base, Width; + int64_t Offset; + + // Is this a load or store with an immediate offset with SP as the base? + if (!MI.mayLoadOrStore() || + !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) || + Base != AArch64::SP) + continue; + + // It is, so we have to fix it up. + unsigned Scale; + int64_t Dummy1, Dummy2; + + MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI); + assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!"); + getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2); + assert(Scale != 0 && "Unexpected opcode!"); + + // We've pushed the return address to the stack, so add 16 to the offset. + // This is safe, since we already checked if it would overflow when we + // checked if this instruction was legal to outline. + int64_t NewImm = (Offset + 16)/Scale; + StackOffsetOperand.setImm(NewImm); + } +} + +void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const { + + // If this is a tail call outlined function, then there's already a return. + if (IsTailCall) + return; + + // It's not a tail call, so we have to insert the return ourselves. + MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET)) + .addReg(AArch64::LR, RegState::Undef); + MBB.insert(MBB.end(), ret); + + // Walk over the basic block and fix up all the stack accesses. + fixupPostOutline(MBB); +} + +void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, + MachineFunction &MF, + bool IsTailCall) const {} + +MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( + Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, + MachineFunction &MF, bool IsTailCall) const { + + // Are we tail calling? + if (IsTailCall) { + // If yes, then we can just branch to the label. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::B)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // We're not tail calling, so we have to save LR before the call and restore + // it after. + MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(-16); + It = MBB.insert(It, STRXpre); + It++; + + // Insert the call. + It = MBB.insert(It, + BuildMI(MF, DebugLoc(), get(AArch64::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + + It++; + + // Restore the link register. + MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost)) + .addReg(AArch64::SP, RegState::Define) + .addReg(AArch64::LR) + .addReg(AArch64::SP) + .addImm(16); + It = MBB.insert(It, LDRXpost); + + return It; +} + |