diff options
Diffstat (limited to 'lib/Target/X86/X86InstrInfo.cpp')
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 642 |
1 files changed, 271 insertions, 371 deletions
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 34e12ca..ce471ea 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -784,7 +784,9 @@ static bool isFrameLoadOpcode(int Opcode) { case X86::MOV8rm: case X86::MOV16rm: case X86::MOV32rm: + case X86::MOV32rm_TC: case X86::MOV64rm: + case X86::MOV64rm_TC: case X86::LD_Fp64m: case X86::MOVSSrm: case X86::MOVSDrm: @@ -805,7 +807,9 @@ static bool isFrameStoreOpcode(int Opcode) { case X86::MOV8mr: case X86::MOV16mr: case X86::MOV32mr: + case X86::MOV32mr_TC: case X86::MOV64mr: + case X86::MOV64mr_TC: case X86::ST_FpP64m: case X86::MOVSSmr: case X86::MOVSDmr: @@ -863,7 +867,7 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const { if (isFrameStoreOpcode(MI->getOpcode())) if (isFrameOperand(MI, 0, FrameIndex)) - return MI->getOperand(X86AddrNumOperands).getReg(); + return MI->getOperand(X86::AddrNumOperands).getReg(); return 0; } @@ -1064,14 +1068,9 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { DebugLoc DL = Orig->getDebugLoc(); - if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = TRI->getSubReg(DestReg, SubIdx); - SubIdx = 0; - } - // MOV32r0 etc. are implemented with xor which clobbers condition code. // Re-materialize them as movri instructions to avoid side effects. bool Clone = true; @@ -1098,14 +1097,13 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, if (Clone) { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); MBB.insert(I, MI); } else { - BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); + BuildMI(MBB, I, DL, get(Opc)).addOperand(Orig->getOperand(0)).addImm(0); } MachineInstr *NewMI = prior(I); - NewMI->getOperand(0).setSubReg(SubIdx); + NewMI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI); } /// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that @@ -1151,10 +1149,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // least on modern x86 machines). BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); MachineInstr *InsMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) - .addReg(leaInReg) - .addReg(Src, getKillRegState(isKill)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg, RegState::Define, X86::sub_16bit) + .addReg(Src, getKillRegState(isKill)); MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg); @@ -1165,20 +1162,20 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, case X86::SHL16ri: { unsigned ShAmt = MI->getOperand(2).getImm(); MIB.addReg(0).addImm(1 << ShAmt) - .addReg(leaInReg, RegState::Kill).addImm(0); + .addReg(leaInReg, RegState::Kill).addImm(0).addReg(0); break; } case X86::INC16r: case X86::INC64_16r: - addLeaRegOffset(MIB, leaInReg, true, 1); + addRegOffset(MIB, leaInReg, true, 1); break; case X86::DEC16r: case X86::DEC64_16r: - addLeaRegOffset(MIB, leaInReg, true, -1); + addRegOffset(MIB, leaInReg, true, -1); break; case X86::ADD16ri: case X86::ADD16ri8: - addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); + addRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); break; case X86::ADD16rr: { unsigned Src2 = MI->getOperand(2).getReg(); @@ -1195,10 +1192,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // well be shifting and then extracting the lower 16-bits. BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); InsMI2 = - BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) - .addReg(leaInReg2) - .addReg(Src2, getKillRegState(isKill2)) - .addImm(X86::sub_16bit); + BuildMI(*MFI, MIB, MI->getDebugLoc(), get(TargetOpcode::COPY)) + .addReg(leaInReg2, RegState::Define, X86::sub_16bit) + .addReg(Src2, getKillRegState(isKill2)); addRegReg(MIB, leaInReg, true, leaInReg2, true); } if (LV && isKill2 && InsMI2) @@ -1209,10 +1205,9 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineInstr *NewMI = MIB; MachineInstr *ExtMI = - BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) + BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(TargetOpcode::COPY)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) - .addReg(leaOutReg, RegState::Kill) - .addImm(X86::sub_16bit); + .addReg(leaOutReg, RegState::Kill, X86::sub_16bit); if (LV) { // Update live variables @@ -1283,7 +1278,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } case X86::SHL32ri: { @@ -1297,7 +1292,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) - .addReg(Src, getKillRegState(isKill)).addImm(0); + .addReg(Src, getKillRegState(isKill)).addImm(0).addReg(0); break; } case X86::SHL16ri: { @@ -1313,7 +1308,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, .addReg(Dest, RegState::Define | getDeadRegState(isDead)) .addReg(0).addImm(1 << ShAmt) .addReg(Src, getKillRegState(isKill)) - .addImm(0); + .addImm(0).addReg(0); break; } default: { @@ -1331,7 +1326,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, 1); @@ -1353,7 +1348,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r : (is64Bit ? X86::LEA64_32r : X86::LEA32r); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, -1); @@ -1401,7 +1396,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD64ri32: case X86::ADD64ri8: assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1410,7 +1405,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::ADD32ri8: { assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1421,7 +1416,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, if (DisableLEA16) return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); - NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) + NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) .addReg(Dest, RegState::Define | getDeadRegState(isDead)), Src, isKill, MI->getOperand(2).getImm()); @@ -1845,9 +1840,8 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, - const SmallVectorImpl<MachineOperand> &Cond) const { - // FIXME this should probably have a DebugLoc operand - DebugLoc dl; + const SmallVectorImpl<MachineOperand> &Cond, + DebugLoc DL) const { // Shouldn't be a fall through. assert(TBB && "InsertBranch must not be told to insert a fallthrough"); assert((Cond.size() == 1 || Cond.size() == 0) && @@ -1856,7 +1850,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, if (Cond.empty()) { // Unconditional branch? assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(TBB); return 1; } @@ -1866,27 +1860,27 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, switch (CC) { case X86::COND_NP_OR_E: // Synthesize NP_OR_E with two branches. - BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNP_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JE_4)).addMBB(TBB); ++Count; break; case X86::COND_NE_OR_P: // Synthesize NE_OR_P with two branches. - BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JNE_4)).addMBB(TBB); ++Count; - BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB); + BuildMI(&MBB, DL, get(X86::JP_4)).addMBB(TBB); ++Count; break; default: { unsigned Opc = GetCondBranchFromCond(CC); - BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); + BuildMI(&MBB, DL, get(Opc)).addMBB(TBB); ++Count; } } if (FBB) { // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB); + BuildMI(&MBB, DL, get(X86::JMP_4)).addMBB(FBB); ++Count; } return Count; @@ -1897,237 +1891,153 @@ static bool isHReg(unsigned Reg) { return X86::GR8_ABCD_HRegClass.contains(Reg); } -bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SrcReg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - DebugLoc DL) const { - - // Determine if DstRC and SrcRC have a common superclass in common. - const TargetRegisterClass *CommonRC = DestRC; - if (DestRC == SrcRC) - /* Source and destination have the same register class. */; - else if (CommonRC->hasSuperClass(SrcRC)) - CommonRC = SrcRC; - else if (!DestRC->hasSubClass(SrcRC)) { - // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, - // but we want to copy them as GR64. Similarly, for GR32_NOREX and - // GR32_NOSP, copy as GR32. - if (SrcRC->hasSuperClass(&X86::GR64RegClass) && - DestRC->hasSuperClass(&X86::GR64RegClass)) - CommonRC = &X86::GR64RegClass; - else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && - DestRC->hasSuperClass(&X86::GR32RegClass)) - CommonRC = &X86::GR32RegClass; +void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, DebugLoc DL, + unsigned DestReg, unsigned SrcReg, + bool KillSrc) const { + // First deal with the normal symmetric copies. + unsigned Opc = 0; + if (X86::GR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV64rr; + else if (X86::GR32RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV32rr; + else if (X86::GR16RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOV16rr; + else if (X86::GR8RegClass.contains(DestReg, SrcReg)) { + // Copying to or from a physical H register on x86-64 requires a NOREX + // move. Otherwise use a normal move. + if ((isHReg(DestReg) || isHReg(SrcReg)) && + TM.getSubtarget<X86Subtarget>().is64Bit()) + Opc = X86::MOV8rr_NOREX; else - CommonRC = 0; - } - - if (CommonRC) { - unsigned Opc; - if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32RegClass || - CommonRC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16RegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rr_NOREX; - else - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_NOREXRegClass || - CommonRC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rr; - } else if (CommonRC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rr; - } else if (CommonRC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rr; - } else if (CommonRC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rr; - } else if (CommonRC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rr_TC; - } else if (CommonRC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rr_TC; - } else if (CommonRC == &X86::RFP32RegClass) { - Opc = X86::MOV_Fp3232; - } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { - Opc = X86::MOV_Fp6464; - } else if (CommonRC == &X86::RFP80RegClass) { - Opc = X86::MOV_Fp8080; - } else if (CommonRC == &X86::FR32RegClass) { - Opc = X86::FsMOVAPSrr; - } else if (CommonRC == &X86::FR64RegClass) { - Opc = X86::FsMOVAPDrr; - } else if (CommonRC == &X86::VR128RegClass) { - Opc = X86::MOVAPSrr; - } else if (CommonRC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rr; - } else { - return false; - } - BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); - return true; + } else if (X86::VR128RegClass.contains(DestReg, SrcReg)) + Opc = X86::MOVAPSrr; + else if (X86::VR64RegClass.contains(DestReg, SrcReg)) + Opc = X86::MMX_MOVQ64rr; + + if (Opc) { + BuildMI(MBB, MI, DL, get(Opc), DestReg) + .addReg(SrcReg, getKillRegState(KillSrc)); + return; } // Moving EFLAGS to / from another register requires a push and a pop. - if (SrcRC == &X86::CCRRegClass) { - if (SrcReg != X86::EFLAGS) - return false; - if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { + if (SrcReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF64)); BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); - return true; - } else if (DestRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { + return; + } else if (X86::GR32RegClass.contains(DestReg)) { BuildMI(MBB, MI, DL, get(X86::PUSHF32)); BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); - return true; + return; } - } else if (DestRC == &X86::CCRRegClass) { - if (DestReg != X86::EFLAGS) - return false; - if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); + } + if (DestReg == X86::EFLAGS) { + if (X86::GR64RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH64r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF64)); - return true; - } else if (SrcRC == &X86::GR32RegClass || - DestRC == &X86::GR32_NOSPRegClass) { - BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); + return; + } else if (X86::GR32RegClass.contains(SrcReg)) { + BuildMI(MBB, MI, DL, get(X86::PUSH32r)) + .addReg(SrcReg, getKillRegState(KillSrc)); BuildMI(MBB, MI, DL, get(X86::POPF32)); - return true; - } - } - - // Moving from ST(0) turns into FpGET_ST0_32 etc. - if (SrcRC == &X86::RSTRegClass) { - // Copying from ST(0)/ST(1). - if (SrcReg != X86::ST0 && SrcReg != X86::ST1) - // Can only copy from ST(0)/ST(1) right now - return false; - bool isST0 = SrcReg == X86::ST0; - unsigned Opc; - if (DestRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; - else if (DestRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; - else { - if (DestRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; + return; } - BuildMI(MBB, MI, DL, get(Opc), DestReg); - return true; } - // Moving to ST(0) turns into FpSET_ST0_32 etc. - if (DestRC == &X86::RSTRegClass) { - // Copying to ST(0) / ST(1). - if (DestReg != X86::ST0 && DestReg != X86::ST1) - // Can only copy to TOS right now - return false; - bool isST0 = DestReg == X86::ST0; - unsigned Opc; - if (SrcRC == &X86::RFP32RegClass) - Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; - else if (SrcRC == &X86::RFP64RegClass) - Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; - else { - if (SrcRC != &X86::RFP80RegClass) - return false; - Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; - } - BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); - return true; - } - - // Not yet supported! - return false; + DEBUG(dbgs() << "Cannot copy " << RI.getName(SrcReg) + << " to " << RI.getName(DestReg) << '\n'); + llvm_unreachable("Cannot emit physreg copy instruction"); } -static unsigned getStoreRegOpcode(unsigned SrcReg, - const TargetRegisterClass *RC, - bool isStackAligned, - TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8RegClass) { +static unsigned getLoadStoreRegOpcode(unsigned Reg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM, + bool load) { + switch (RC->getID()) { + default: + llvm_unreachable("Unknown regclass"); + case X86::GR64RegClassID: + case X86::GR64_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32RegClassID: + case X86::GR32_NOSPRegClassID: + case X86::GR32_ADRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16RegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8RegClassID: // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. - if (isHReg(SrcReg) && + if (isHReg(Reg) && TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR8_ABCD_HRegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_ABCDRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_ABCDRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_ABCDRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_ABCD_LRegClassID: + return load ? X86::MOV8rm :X86::MOV8mr; + case X86::GR8_ABCD_HRegClassID: if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8mr_NOREX; + return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX; else - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64mr; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32mr; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16mr; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8mr; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64mr_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32mr_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::ST_FpP80m; // pops - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::ST_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::ST_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSmr; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDmr; - } else if (RC == &X86::VR128RegClass) { + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_NOREXRegClassID: + case X86::GR64_NOREX_NOSPRegClassID: + return load ? X86::MOV64rm : X86::MOV64mr; + case X86::GR32_NOREXRegClassID: + return load ? X86::MOV32rm : X86::MOV32mr; + case X86::GR16_NOREXRegClassID: + return load ? X86::MOV16rm : X86::MOV16mr; + case X86::GR8_NOREXRegClassID: + return load ? X86::MOV8rm : X86::MOV8mr; + case X86::GR64_TCRegClassID: + return load ? X86::MOV64rm_TC : X86::MOV64mr_TC; + case X86::GR32_TCRegClassID: + return load ? X86::MOV32rm_TC : X86::MOV32mr_TC; + case X86::RFP80RegClassID: + return load ? X86::LD_Fp80m : X86::ST_FpP80m; + case X86::RFP64RegClassID: + return load ? X86::LD_Fp64m : X86::ST_Fp64m; + case X86::RFP32RegClassID: + return load ? X86::LD_Fp32m : X86::ST_Fp32m; + case X86::FR32RegClassID: + return load ? X86::MOVSSrm : X86::MOVSSmr; + case X86::FR64RegClassID: + return load ? X86::MOVSDrm : X86::MOVSDmr; + case X86::VR128RegClassID: // If stack is realigned we can use aligned stores. - Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64mr; - } else { - llvm_unreachable("Unknown regclass"); + if (isStackAligned) + return load ? X86::MOVAPSrm : X86::MOVAPSmr; + else + return load ? X86::MOVUPSrm : X86::MOVUPSmr; + case X86::VR64RegClassID: + return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr; } +} + +static unsigned getStoreRegOpcode(unsigned SrcReg, + const TargetRegisterClass *RC, + bool isStackAligned, + TargetMachine &TM) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); +} - return Opc; + +static unsigned getLoadRegOpcode(unsigned DestReg, + const TargetRegisterClass *RC, + bool isStackAligned, + const TargetMachine &TM) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -2150,7 +2060,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -2161,72 +2071,6 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, NewMIs.push_back(MIB); } -static unsigned getLoadRegOpcode(unsigned DestReg, - const TargetRegisterClass *RC, - bool isStackAligned, - const TargetMachine &TM) { - unsigned Opc = 0; - if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16RegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8RegClass) { - // Copying to or from a physical H register on x86-64 requires a NOREX - // move. Otherwise use a normal move. - if (isHReg(DestReg) && - TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_ABCDRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_ABCDRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_ABCDRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_ABCD_LRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR8_ABCD_HRegClass) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) - Opc = X86::MOV8rm_NOREX; - else - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_NOREXRegClass || - RC == &X86::GR64_NOREX_NOSPRegClass) { - Opc = X86::MOV64rm; - } else if (RC == &X86::GR32_NOREXRegClass) { - Opc = X86::MOV32rm; - } else if (RC == &X86::GR16_NOREXRegClass) { - Opc = X86::MOV16rm; - } else if (RC == &X86::GR8_NOREXRegClass) { - Opc = X86::MOV8rm; - } else if (RC == &X86::GR64_TCRegClass) { - Opc = X86::MOV64rm_TC; - } else if (RC == &X86::GR32_TCRegClass) { - Opc = X86::MOV32rm_TC; - } else if (RC == &X86::RFP80RegClass) { - Opc = X86::LD_Fp80m; - } else if (RC == &X86::RFP64RegClass) { - Opc = X86::LD_Fp64m; - } else if (RC == &X86::RFP32RegClass) { - Opc = X86::LD_Fp32m; - } else if (RC == &X86::FR32RegClass) { - Opc = X86::MOVSSrm; - } else if (RC == &X86::FR64RegClass) { - Opc = X86::MOVSDrm; - } else if (RC == &X86::VR128RegClass) { - // If stack is realigned we can use aligned loads. - Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; - } else if (RC == &X86::VR64RegClass) { - Opc = X86::MMX_MOVQ64rm; - } else { - llvm_unreachable("Unknown regclass"); - } - - return Opc; -} void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, @@ -2246,7 +2090,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (*MMOBegin)->getAlignment() >= 16; + bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2277,18 +2121,17 @@ bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); if (Reg == FPReg) // X86RegisterInfo::emitPrologue will handle spilling of frame register. continue; - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { CalleeFrameSize += SlotSize; BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); } else { - storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass, - &RI); + storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), + &X86::VR128RegClass, &RI); } } @@ -2315,11 +2158,11 @@ bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (Reg == FPReg) // X86RegisterInfo::emitEpilogue will handle restoring of frame register. continue; - const TargetRegisterClass *RegClass = CSI[i].getRegClass(); - if (RegClass != &X86::VR128RegClass && !isWin64) { + if (!X86::VR128RegClass.contains(Reg) && !isWin64) { BuildMI(MBB, MI, DL, get(Opc), Reg); } else { - loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass, &RI); + loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), + &X86::VR128RegClass, &RI); } } return true; @@ -2492,7 +2335,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } // No fusion - if (PrintFailedFusing) + if (PrintFailedFusing && !MI->isCopy()) dbgs() << "We failed to fuse operand " << i << " in " << *MI; return NULL; } @@ -2610,7 +2453,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, } else if (Ops.size() != 1) return NULL; - SmallVector<MachineOperand,X86AddrNumOperands> MOs; + SmallVector<MachineOperand,X86::AddrNumOperands> MOs; switch (LoadMI->getOpcode()) { case X86::V_SET0PS: case X86::V_SET0PD: @@ -2632,7 +2475,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, if (TM.getSubtarget<X86Subtarget>().is64Bit()) PICBase = X86::RIP; else - // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); + // FIXME: PICBase = getGlobalBaseReg(&MF); // This doesn't work for several reasons. // 1. GlobalBaseReg may have been spilled. // 2. It may not be live at MI. @@ -2664,7 +2507,7 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, default: { // Folding a normal load. Just copy the load's address operands. unsigned NumOps = LoadMI->getDesc().getNumOperands(); - for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) + for (unsigned i = NumOps - X86::AddrNumOperands; i != NumOps; ++i) MOs.push_back(LoadMI->getOperand(i)); break; } @@ -2727,7 +2570,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (I != OpcodeTablePtr->end()) return true; } - return false; + return TargetInstrInfoImpl::canFoldMemoryOperand(MI, Ops); } bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, @@ -2751,13 +2594,20 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetInstrDesc &TID = get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = TOI.getRegClass(&RI); - SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; + if (!MI->hasOneMemOperand() && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Without memoperands, loadRegFromAddr and storeRegToStackSlot will + // conservatively assume the address is unaligned. That's bad for + // performance. + return false; + SmallVector<MachineOperand, X86::AddrNumOperands> AddrOps; SmallVector<MachineOperand,2> BeforeOps; SmallVector<MachineOperand,2> AfterOps; SmallVector<MachineOperand,4> ImpOps; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &Op = MI->getOperand(i); - if (i >= Index && i < Index + X86AddrNumOperands) + if (i >= Index && i < Index + X86::AddrNumOperands) AddrOps.push_back(Op); else if (Op.isReg() && Op.isImplicit()) ImpOps.push_back(Op); @@ -2776,7 +2626,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); if (UnfoldStore) { // Address operands cannot be marked isKill. - for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { + for (unsigned i = 1; i != 1 + X86::AddrNumOperands; ++i) { MachineOperand &MO = NewMIs[0]->getOperand(i); if (MO.isReg()) MO.setIsKill(false); @@ -2873,7 +2723,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned NumOps = N->getNumOperands(); for (unsigned i = 0; i != NumOps-1; ++i) { SDValue Op = N->getOperand(i); - if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) + if (i >= Index-NumDefs && i < Index-NumDefs + X86::AddrNumOperands) AddrOps.push_back(Op); else if (i < Index-NumDefs) BeforeOps.push_back(Op); @@ -2892,7 +2742,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned load. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); @@ -2929,7 +2784,12 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineInstr::mmo_iterator> MMOs = MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), cast<MachineSDNode>(N)->memoperands_end()); - bool isAligned = (*MMOs.first)->getAlignment() >= 16; + if (!(*MMOs.first) && + RC == &X86::VR128RegClass && + !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + // Do not introduce a slow unaligned store. + return false; + bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -3065,16 +2925,16 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, EVT VT = Load1->getValueType(0); switch (VT.getSimpleVT().SimpleTy) { - default: { + default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. if (TM.getSubtargetImpl()->is64Bit()) { if (NumLoads >= 3) return false; - } else if (NumLoads) + } else if (NumLoads) { return false; + } break; - } case MVT::i8: case MVT::i16: case MVT::i32: @@ -3083,6 +2943,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, case MVT::f64: if (NumLoads) return false; + break; } return true; @@ -3123,6 +2984,8 @@ bool X86InstrInfo::isX86_64ExtendedReg(unsigned RegNo) { case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: + case X86::YMM8: case X86::YMM9: case X86::YMM10: case X86::YMM11: + case X86::YMM12: case X86::YMM13: case X86::YMM14: case X86::YMM15: return true; } return false; @@ -3194,7 +3057,7 @@ unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { case X86II::MRM4m: case X86II::MRM5m: case X86II::MRM6m: case X86II::MRM7m: case X86II::MRMDestMem: { - unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); + unsigned e = (isTwoAddr ? X86::AddrNumOperands+1 : X86::AddrNumOperands); i = isTwoAddr ? 1 : 0; if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) REX |= 1 << 2; @@ -3546,7 +3409,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, case X86II::MRMDestMem: { ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3565,16 +3428,9 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, break; case X86II::MRMSrcMem: { - int AddrOperands; - if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || - Opcode == X86::LEA16r || Opcode == X86::LEA32r) - AddrOperands = X86AddrNumOperands - 1; // No segment register - else - AddrOperands = X86AddrNumOperands; - ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); - CurOp += AddrOperands + 1; + CurOp += X86::AddrNumOperands + 1; if (CurOp != NumOps) { ++CurOp; FinalSize += sizeConstant(X86II::getSizeOfImm(Desc->TSFlags)); @@ -3628,7 +3484,7 @@ static unsigned GetInstSizeWithDesc(const MachineInstr &MI, ++FinalSize; FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); - CurOp += X86AddrNumOperands; + CurOp += X86::AddrNumOperands; if (CurOp != NumOps) { const MachineOperand &MO = MI.getOperand(CurOp++); @@ -3694,6 +3550,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { /// the global base register value. Output instructions required to /// initialize the register in the function entry block, if necessary. /// +/// TODO: Eliminate this and move the code to X86MachineFunctionInfo. +/// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && "X86-64 PIC uses RIP relative addressing"); @@ -3703,30 +3561,10 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { if (GlobalBaseReg != 0) return GlobalBaseReg; - // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + // Create the register. The code to initialize it is inserted + // later, by the CGBR pass (below). MachineRegisterInfo &RegInfo = MF->getRegInfo(); - unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - - const TargetInstrInfo *TII = TM.getInstrInfo(); - // Operand of MovePCtoStack is completely ignored by asm printer. It's - // only used in JIT code emission as displacement to pc. - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); - - // If we're using vanilla 'GOT' PIC style, we should use relative addressing - // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. - if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { - GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); - // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register - BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) - .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", - X86II::MO_GOT_ABSOLUTE_ADDRESS); - } else { - GlobalBaseReg = PC; - } - + GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); X86FI->setGlobalBaseReg(GlobalBaseReg); return GlobalBaseReg; } @@ -3784,3 +3622,65 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +namespace { + /// CGBR - Create Global Base Reg pass. This initializes the PIC + /// global base register for x86-32. + struct CGBR : public MachineFunctionPass { + static char ID; + CGBR() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + const X86TargetMachine *TM = + static_cast<const X86TargetMachine *>(&MF.getTarget()); + + assert(!TM->getSubtarget<X86Subtarget>().is64Bit() && + "X86-64 PIC uses RIP relative addressing"); + + // Only emit a global base reg in PIC mode. + if (TM->getRelocationModel() != Reloc::PIC_) + return false; + + // Insert the set of GlobalBaseReg into the first MBB of the function + MachineBasicBlock &FirstMBB = MF.front(); + MachineBasicBlock::iterator MBBI = FirstMBB.begin(); + DebugLoc DL = FirstMBB.findDebugLoc(MBBI); + MachineRegisterInfo &RegInfo = MF.getRegInfo(); + const X86InstrInfo *TII = TM->getInstrInfo(); + + unsigned PC; + if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) + PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); + else + PC = TII->getGlobalBaseReg(&MF); + + // Operand of MovePCtoStack is completely ignored by asm printer. It's + // only used in JIT code emission as displacement to pc. + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); + + // If we're using vanilla 'GOT' PIC style, we should use relative addressing + // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. + if (TM->getSubtarget<X86Subtarget>().isPICStyleGOT()) { + unsigned GlobalBaseReg = TII->getGlobalBaseReg(&MF); + // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register + BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) + .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", + X86II::MO_GOT_ABSOLUTE_ADDRESS); + } + + return true; + } + + virtual const char *getPassName() const { + return "X86 PIC Global Base Reg Initialization"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; +} + +char CGBR::ID = 0; +FunctionPass* +llvm::createGlobalBaseRegPass() { return new CGBR(); } |