diff options
Diffstat (limited to 'lib/Target')
142 files changed, 2686 insertions, 1321 deletions
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 76cc06e..ff1980d 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -103,11 +103,13 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM, ObjectCodeEmitter &OCE); FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false); +FunctionPass *createARMExpandPseudoPass(); FunctionPass *createARMConstantIslandPass(); FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +FunctionPass *createARMMaxStackAlignmentCalculatorPass(); extern Target TheARMTarget, TheThumbTarget; diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h index c603708..ddeb1b9 100644 --- a/lib/Target/ARM/ARMAddressingModes.h +++ b/lib/Target/ARM/ARMAddressingModes.h @@ -520,8 +520,8 @@ namespace ARM_AM { return ((AM5Opc >> 8) & 1) ? sub : add; } - /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and - /// FSTM instructions. + /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and + /// VSTM instructions. static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB, unsigned char Offset) { assert((SubMode == ia || SubMode == db) && @@ -541,13 +541,15 @@ namespace ARM_AM { // // This is used for NEON load / store instructions. // - // addrmode6 := reg with optional writeback + // addrmode6 := reg with optional writeback and alignment // - // This is stored in three operands [regaddr, regupdate, opc]. The first is - // the address register. The second register holds the value of a post-access - // increment for writeback or reg0 if no writeback or if the writeback - // increment is the size of the memory access. The third operand encodes - // whether there is writeback to the address register. + // This is stored in four operands [regaddr, regupdate, opc, align]. The + // first is the address register. The second register holds the value of + // a post-access increment for writeback or reg0 if no writeback or if the + // writeback increment is the size of the memory access. The third + // operand encodes whether there is writeback to the address register. The + // fourth operand is the value of the alignment specifier to use or zero if + // no explicit alignment. static inline unsigned getAM6Opc(bool WB = false) { return (int)WB; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 7c5b0f0..b50b609 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===// +//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -14,11 +14,16 @@ #include "ARMBaseInstrInfo.h" #include "ARM.h" #include "ARMAddressingModes.h" +#include "ARMConstantPoolValue.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "llvm/Constants.h" +#include "llvm/Function.h" +#include "llvm/GlobalValue.h" #include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" @@ -504,9 +509,9 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI, switch (MI.getOpcode()) { default: break; - case ARM::FCPYS: - case ARM::FCPYD: + case ARM::VMOVS: case ARM::VMOVD: + case ARM::VMOVDneon: case ARM::VMOVQ: { SrcReg = MI.getOperand(1).getReg(); DstReg = MI.getOperand(0).getReg(); @@ -556,8 +561,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::FLDD: - case ARM::FLDS: + case ARM::VLDRD: + case ARM::VLDRS: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { @@ -595,8 +600,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI, return MI->getOperand(0).getReg(); } break; - case ARM::FSTD: - case ARM::FSTS: + case ARM::VSTRD: + case ARM::VSTRS: if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) { @@ -632,17 +637,17 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB, AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg).addReg(SrcReg))); } else if (DestRC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg) .addReg(SrcReg)); } else if (DestRC == ARM::DPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg) .addReg(SrcReg)); } else if (DestRC == ARM::DPR_VFP2RegisterClass || DestRC == ARM::DPR_8RegisterClass || SrcRC == ARM::DPR_VFP2RegisterClass || SrcRC == ARM::DPR_8RegisterClass) { // Always use neon reg-reg move if source or dest is NEON-only regclass. - BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg); + BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg); } else if (DestRC == ARM::QPRRegisterClass || DestRC == ARM::QPR_VFP2RegisterClass || DestRC == ARM::QPR_8RegisterClass) { @@ -662,12 +667,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOStore, 0, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + Align); if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR)) @@ -676,19 +682,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS)) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + if (Align >= 16 + && (getRegisterInfo().needsStackRealignment(MF))) { + BuildMI(MBB, I, DL, get(ARM::VST1q64)) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO) + .addReg(SrcReg, getKillRegState(isKill)); + } else { + BuildMI(MBB, I, DL, get(ARM::VSTRQ)). + addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI).addImm(0).addMemOperand(MMO); + } } } @@ -700,12 +714,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, if (I != MBB.end()) DL = I->getDebugLoc(); MachineFunction &MF = *MBB.getParent(); MachineFrameInfo &MFI = *MF.getFrameInfo(); + unsigned Align = MFI.getObjectAlignment(FI); MachineMemOperand *MMO = MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), MachineMemOperand::MOLoad, 0, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); + Align); if (RC == ARM::GPRRegisterClass) { AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg) @@ -713,18 +728,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } else if (RC == ARM::DPRRegisterClass || RC == ARM::DPR_VFP2RegisterClass || RC == ARM::DPR_8RegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else if (RC == ARM::SPRRegisterClass) { - AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg) + AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg) .addFrameIndex(FI).addImm(0).addMemOperand(MMO)); } else { assert((RC == ARM::QPRRegisterClass || RC == ARM::QPR_VFP2RegisterClass || RC == ARM::QPR_8RegisterClass) && "Unknown regclass!"); // FIXME: Neon instructions should support predicates - BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). - addMemOperand(MMO); + if (Align >= 16 + && (getRegisterInfo().needsStackRealignment(MF))) { + BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg) + .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO); + } else { + BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0). + addMemOperand(MMO); + } } } @@ -805,7 +826,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, DstSubReg) .addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0); } - } else if (Opc == ARM::FCPYS) { + } else if (Opc == ARM::VMOVS) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -813,7 +834,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS)) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), SrcSubReg) .addFrameIndex(FI) @@ -823,7 +844,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | @@ -832,7 +853,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, .addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg); } } - else if (Opc == ARM::FCPYD) { + else if (Opc == ARM::VMOVD) { unsigned Pred = MI->getOperand(2).getImm(); unsigned PredReg = MI->getOperand(3).getReg(); if (OpNum == 0) { // move -> store @@ -840,7 +861,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned SrcSubReg = MI->getOperand(1).getSubReg(); bool isKill = MI->getOperand(1).isKill(); bool isUndef = MI->getOperand(1).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD)) .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef), SrcSubReg) @@ -850,7 +871,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned DstSubReg = MI->getOperand(0).getSubReg(); bool isDead = MI->getOperand(0).isDead(); bool isUndef = MI->getOperand(0).isUndef(); - NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD)) + NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD)) .addReg(DstReg, RegState::Define | getDeadRegState(isDead) | @@ -886,15 +907,114 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI, Opc == ARM::tMOVtgpr2gpr || Opc == ARM::tMOVgpr2tgpr) { return true; - } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) { + } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) { return true; - } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) { + } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) { return false; // FIXME } return false; } +void ARMBaseInstrInfo:: +reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { + DebugLoc dl = Orig->getDebugLoc(); + + if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { + DestReg = TRI->getSubReg(DestReg, SubIdx); + SubIdx = 0; + } + + unsigned Opcode = Orig->getOpcode(); + switch (Opcode) { + default: { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->getOperand(0).setReg(DestReg); + MBB.insert(I, MI); + break; + } + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + MachineFunction &MF = *MBB.getParent(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + MachineConstantPool *MCP = MF.getConstantPool(); + unsigned CPI = Orig->getOperand(1).getIndex(); + const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI]; + assert(MCPE.isMachineConstantPoolEntry() && + "Expecting a machine constantpool entry!"); + ARMConstantPoolValue *ACPV = + static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal); + unsigned PCLabelId = AFI->createConstPoolEntryUId(); + ARMConstantPoolValue *NewCPV = 0; + if (ACPV->isGlobalValue()) + NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId, + ARMCP::CPValue, 4); + else if (ACPV->isExtSymbol()) + NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(), + ACPV->getSymbol(), PCLabelId, 4); + else if (ACPV->isBlockAddress()) + NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId, + ARMCP::CPBlockAddress, 4); + else + llvm_unreachable("Unexpected ARM constantpool value type!!"); + CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment()); + MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode), + DestReg) + .addConstantPoolIndex(CPI).addImm(PCLabelId); + (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end()); + break; + } + } + + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); +} + +bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { + int Opcode = MI0->getOpcode(); + if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) { + if (MI1->getOpcode() != Opcode) + return false; + if (MI0->getNumOperands() != MI1->getNumOperands()) + return false; + + const MachineOperand &MO0 = MI0->getOperand(1); + const MachineOperand &MO1 = MI1->getOperand(1); + if (MO0.getOffset() != MO1.getOffset()) + return false; + + const MachineFunction *MF = MI0->getParent()->getParent(); + const MachineConstantPool *MCP = MF->getConstantPool(); + int CPI0 = MO0.getIndex(); + int CPI1 = MO1.getIndex(); + const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0]; + const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1]; + ARMConstantPoolValue *ACPV0 = + static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal); + ARMConstantPoolValue *ACPV1 = + static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal); + return ACPV0->hasSameValue(ACPV1); + } + + return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI); +} + +unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const { + // If the target processor can predict indirect branches, it is highly + // desirable to duplicate them, since it can often make them predictable. + if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) && + getSubtarget().hasBranchTargetBuffer()) + return DefaultLimit + 2; + return DefaultLimit; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -1022,6 +1142,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, break; } case ARMII::AddrMode4: + case ARMII::AddrMode6: // Can't fold any offset even if it's zero. return false; case ARMII::AddrMode5: { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 2ba3774..73e854f 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -1,4 +1,4 @@ -//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===// +//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -261,9 +261,20 @@ public: virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, - const SmallVectorImpl<unsigned> &Ops, + const SmallVectorImpl<unsigned> &Ops, MachineInstr* LoadMI) const; + virtual void reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; + + virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other, + const MachineRegisterInfo *MRI) const; + + virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB, + unsigned DefaultLimit) const; }; static inline diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 70377f9e..19762ee 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -44,10 +44,6 @@ static cl::opt<bool> ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true), cl::desc("Reuse repeated frame index values")); -static cl::opt<bool> -ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false), - cl::desc("Dynamically re-align the stack as needed")); - unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum, bool *isSPVFP) { if (isSPVFP) @@ -476,11 +472,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg, } static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) { - // FIXME: For now, force at least 128-bit alignment. This will push the - // nightly tester harder for making sure things work correctly. When - // we're ready to enable this for real, this goes back to starting at zero. - unsigned MaxAlign = 16; -// unsigned MaxAlign = 0; + unsigned MaxAlign = 0; for (int i = FFI->getObjectIndexBegin(), e = FFI->getObjectIndexEnd(); i != e; ++i) { @@ -508,20 +500,12 @@ bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const { bool ARMBaseRegisterInfo:: needsStackRealignment(const MachineFunction &MF) const { - // Only do this for ARM if explicitly enabled - // FIXME: Once it's passing all the tests, enable by default - if (!ARMDynamicStackAlign) - return false; - - // FIXME: To force more brutal testing, realign whether we need to or not. - // Change this to be more selective when we turn it on for real, of course. const MachineFrameInfo *MFI = MF.getFrameInfo(); const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); -// unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); + unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); return (RealignStack && !AFI->isThumb1OnlyFunction() && - AFI->hasStackFrame() && -// (MFI->getMaxAlignment() > StackAlign) && + (MFI->getMaxAlignment() > StackAlign) && !MFI->hasVarSizedObjects()); } @@ -529,7 +513,8 @@ bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const const MachineFrameInfo *MFI = MF.getFrameInfo(); if (NoFramePointerElim && MFI->hasCalls()) return true; - return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken(); + return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() + || needsStackRealignment(MF); } /// estimateStackSize - Estimate and return the size of the frame. @@ -604,7 +589,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Calculate and set max stack object alignment early, so we can decide // whether we will need stack realignment (and thus FP). - if (ARMDynamicStackAlign) { + if (RealignStack) { unsigned MaxAlign = std::max(MFI->getMaxAlignment(), calculateMaxStackAlignment(MFI)); MFI->setMaxAlignment(MaxAlign); @@ -789,7 +774,8 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve a slot closest to SP or frame pointer. const TargetRegisterClass *RC = ARM::GPRRegisterClass; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } } @@ -806,7 +792,8 @@ unsigned ARMBaseRegisterInfo::getRARegister() const { return ARM::LR; } -unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned +ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { if (STI.isTargetDarwin() || hasFP(MF)) return FramePtr; return ARM::SP; @@ -1183,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert((Offset || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) && "This code isn't needed if offset already handled!"); unsigned ScratchReg = 0; @@ -1192,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) - // Must be addrmode4. + // Must be addrmode4/6. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); @@ -1346,7 +1334,7 @@ emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); - movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI); + movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI); NumBytes = DPRCSOffset; if (NumBytes) { // Adjust SP after all the callee-save spills. @@ -1385,7 +1373,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, const unsigned *CSRegs) { - return ((MI->getOpcode() == (int)ARM::FLDD || + return ((MI->getOpcode() == (int)ARM::VLDRD || MI->getOpcode() == (int)ARM::LDR || MI->getOpcode() == (int)ARM::t2LDRi12) && MI->getOperand(1).isFI() && @@ -1411,7 +1399,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { if (NumBytes != 0) emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { - // Unwind MBBI to point to first LDR / FLDD. + // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do @@ -1459,7 +1447,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Move SP to start of integer callee save spill area 2. - movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI); + movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI); emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize()); // Move SP to start of integer callee save spill area 1. @@ -1475,4 +1463,48 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } +namespace { + struct MaximalStackAlignmentCalculator : public MachineFunctionPass { + static char ID; + MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + + // Calculate max stack alignment of all already allocated stack objects. + unsigned MaxAlign = calculateMaxStackAlignment(FFI); + + // Be over-conservative: scan over all vreg defs and find, whether vector + // registers are used. If yes - there is probability, that vector register + // will be spilled and thus stack needs to be aligned properly. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); + + if (FFI->getMaxAlignment() == MaxAlign) + return false; + + FFI->setMaxAlignment(MaxAlign); + return true; + } + + virtual const char *getPassName() const { + return "ARM Stack Required Alignment Auto-Detector"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MaximalStackAlignmentCalculator::ID = 0; +} + +FunctionPass* +llvm::createARMMaxStackAlignmentCalculatorPass() { + return new MaximalStackAlignmentCalculator(); +} + #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h index 029e468..4b267b0 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -105,7 +105,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 13cf676..766acff 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -168,7 +168,8 @@ namespace { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool NeedStub, bool Indirect, intptr_t ACPV = 0); + bool MayNeedFarStub, bool Indirect, + intptr_t ACPV = 0); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc); void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc); @@ -277,13 +278,13 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI, /// template<class CodeEmitter> void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, - bool NeedStub, bool Indirect, + bool MayNeedFarStub, bool Indirect, intptr_t ACPV) { MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, NeedStub) + GV, ACPV, MayNeedFarStub) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, ACPV, NeedStub); + GV, ACPV, MayNeedFarStub); MCE.addRelocation(MR); } diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 9819625..d22c43a 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -31,6 +31,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/CommandLine.h" #include <algorithm> using namespace llvm; @@ -42,6 +43,13 @@ STATISTIC(NumTBs, "Number of table branches generated"); STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk"); STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk"); STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed"); +STATISTIC(NumJTMoved, "Number of jump table destination blocks moved"); +STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted"); + + +static cl::opt<bool> +AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true), + cl::desc("Adjust basic block layout to better use TB[BH]")); namespace { /// ARMConstantIslands - Due to limited PC-relative displacements, ARM @@ -174,6 +182,7 @@ namespace { void DoInitialPlacement(MachineFunction &MF, std::vector<MachineInstr*> &CPEMIs); CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI); + void JumpTableFunctionScan(MachineFunction &MF); void InitialFunctionScan(MachineFunction &MF, const std::vector<MachineInstr*> &CPEMIs); MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI); @@ -201,7 +210,10 @@ namespace { bool UndoLRSpillRestore(); bool OptimizeThumb2Instructions(MachineFunction &MF); bool OptimizeThumb2Branches(MachineFunction &MF); + bool ReorderThumb2JumpTables(MachineFunction &MF); bool OptimizeThumb2JumpTables(MachineFunction &MF); + MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB, + MachineBasicBlock *JTBB); unsigned GetOffsetOf(MachineInstr *MI) const; void dumpBBs(); @@ -262,6 +274,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // the numbers agree with the position of the block in the function. MF.RenumberBlocks(); + // Try to reorder and otherwise adjust the block layout to make good use + // of the TB[BH] instructions. + bool MadeChange = false; + if (isThumb2 && AdjustJumpTableBlocks) { + JumpTableFunctionScan(MF); + MadeChange |= ReorderThumb2JumpTables(MF); + // Data is out of date, so clear it. It'll be re-computed later. + T2JumpTables.clear(); + // Blocks may have shifted around. Keep the numbering up to date. + MF.RenumberBlocks(); + } + // Thumb1 functions containing constant pools get 4-byte alignment. // This is so we can keep exact track of where the alignment padding goes. @@ -292,7 +316,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) { // Iteratively place constant pool entries and fix up branches until there // is no change. - bool MadeChange = false; unsigned NoCPIters = 0, NoBRIters = 0; while (true) { bool CPChange = false; @@ -409,6 +432,21 @@ ARMConstantIslands::CPEntry return NULL; } +/// JumpTableFunctionScan - Do a scan of the function, building up +/// information about the sizes of each block and the locations of all +/// the jump tables. +void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) { + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock &MBB = *MBBI; + + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) + if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT) + T2JumpTables.push_back(I); + } +} + /// InitialFunctionScan - Do the initial scan of the function, building up /// information about the sizes of each block, the location of all the water, /// and finding all of the constant pool users. @@ -541,8 +579,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF, Scale = 4; // +(offset_8*4) break; - case ARM::FLDD: - case ARM::FLDS: + case ARM::VLDRD: + case ARM::VLDRS: Bits = 8; Scale = 4; // +-(offset_8*4) NegOk = true; @@ -1552,7 +1590,6 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) { return MadeChange; } - /// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller /// jumptables when it's possible. bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { @@ -1560,7 +1597,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { // FIXME: After the tables are shrunk, can we get rid some of the // constantpool tables? - const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { MachineInstr *MI = T2JumpTables[i]; @@ -1660,3 +1697,99 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) { return MadeChange; } + +/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that +/// jump tables always branch forwards, since that's what tbb and tbh need. +bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) { + bool MadeChange = false; + + MachineJumpTableInfo *MJTI = MF.getJumpTableInfo(); + const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables(); + for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) { + MachineInstr *MI = T2JumpTables[i]; + const TargetInstrDesc &TID = MI->getDesc(); + unsigned NumOps = TID.getNumOperands(); + unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2); + MachineOperand JTOP = MI->getOperand(JTOpIdx); + unsigned JTI = JTOP.getIndex(); + assert(JTI < JT.size()); + + // We prefer if target blocks for the jump table come after the jump + // instruction so we can use TB[BH]. Loop through the target blocks + // and try to adjust them such that that's true. + int JTNumber = MI->getParent()->getNumber(); + const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs; + for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) { + MachineBasicBlock *MBB = JTBBs[j]; + int DTNumber = MBB->getNumber(); + + if (DTNumber < JTNumber) { + // The destination precedes the switch. Try to move the block forward + // so we have a positive offset. + MachineBasicBlock *NewBB = + AdjustJTTargetBlockForward(MBB, MI->getParent()); + if (NewBB) + MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB); + MadeChange = true; + } + } + } + + return MadeChange; +} + +MachineBasicBlock *ARMConstantIslands:: +AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) +{ + MachineFunction &MF = *BB->getParent(); + + // If it's the destination block is terminated by an unconditional branch, + // try to move it; otherwise, create a new block following the jump + // table that branches back to the actual target. This is a very simple + // heuristic. FIXME: We can definitely improve it. + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector<MachineOperand, 4> Cond; + SmallVector<MachineOperand, 4> CondPrior; + MachineFunction::iterator BBi = BB; + MachineFunction::iterator OldPrior = prior(BBi); + + // If the block terminator isn't analyzable, don't try to move the block + bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond); + + // If the block ends in an unconditional branch, move it. The prior block + // has to have an analyzable terminator for us to move this one. Be paranoid + // and make sure we're not trying to move the entry block of the function. + if (!B && Cond.empty() && BB != MF.begin() && + !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) { + BB->moveAfter(JTBB); + OldPrior->updateTerminator(); + BB->updateTerminator(); + // Update numbering to account for the block being moved. + MF.RenumberBlocks(); + ++NumJTMoved; + return NULL; + } + + // Create a new MBB for the code after the jump BB. + MachineBasicBlock *NewBB = + MF.CreateMachineBasicBlock(JTBB->getBasicBlock()); + MachineFunction::iterator MBBI = JTBB; ++MBBI; + MF.insert(MBBI, NewBB); + + // Add an unconditional branch from NewBB to BB. + // There doesn't seem to be meaningful DebugInfo available; this doesn't + // correspond directly to anything in the source. + assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?"); + BuildMI(NewBB, DebugLoc::getUnknownLoc(), TII->get(ARM::t2B)).addMBB(BB); + + // Update internal data structures to account for the newly inserted MBB. + MF.RenumberBlocks(NewBB); + + // Update the CFG. + NewBB->addSuccessor(BB); + JTBB->removeSuccessor(BB); + JTBB->addSuccessor(NewBB); + + ++NumJTInserted; + return NewBB; +} diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index efa941a..90dd0c7 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -62,9 +62,10 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP, ARMConstantPoolValue *CPV = (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; if (CPV->CVal == CVal && - CPV->S == S && CPV->LabelId == LabelId && - CPV->PCAdjust == PCAdjust) + CPV->PCAdjust == PCAdjust && + (CPV->S == S || strcmp(CPV->S, S) == 0) && + (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0)) return i; } } @@ -84,6 +85,23 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) { ID.AddInteger(PCAdjust); } +bool +ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) { + if (ACPV->Kind == Kind && + ACPV->CVal == CVal && + ACPV->PCAdjust == PCAdjust && + (ACPV->S == S || strcmp(ACPV->S, S) == 0) && + (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) { + if (ACPV->LabelId == LabelId) + return true; + // Two PC relative constpool entries containing the same GV address or + // external symbols. FIXME: What about blockaddress? + if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol) + return true; + } + return false; +} + void ARMConstantPoolValue::dump() const { errs() << " " << *this; } diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h index 8fb3f92..741acde 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.h +++ b/lib/Target/ARM/ARMConstantPoolValue.h @@ -81,6 +81,10 @@ public: virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID); + /// hasSameValue - Return true if this ARM constpool value + /// can share the same constantpool entry as another ARM constpool value. + bool hasSameValue(ARMConstantPoolValue *ACPV); + void print(raw_ostream *O) const { if (O) print(*O); } void print(raw_ostream &O) const; void dump() const; diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp new file mode 100644 index 0000000..4d0f899 --- /dev/null +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -0,0 +1,115 @@ +//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass that expand pseudo instructions into target +// instructions to allow proper scheduling, if-conversion, and other late +// optimizations. This pass should be run after register allocation but before +// post- regalloc scheduling pass. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "arm-pseudo" +#include "ARM.h" +#include "ARMBaseInstrInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" + +using namespace llvm; + +namespace { + class ARMExpandPseudo : public MachineFunctionPass { + public: + static char ID; + ARMExpandPseudo() : MachineFunctionPass(&ID) {} + + const TargetInstrInfo *TII; + + virtual bool runOnMachineFunction(MachineFunction &Fn); + + virtual const char *getPassName() const { + return "ARM pseudo instruction expansion pass"; + } + + private: + bool ExpandMBB(MachineBasicBlock &MBB); + }; + char ARMExpandPseudo::ID = 0; +} + +bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { + bool Modified = false; + + MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + while (MBBI != E) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator NMBBI = next(MBBI); + + unsigned Opcode = MI.getOpcode(); + switch (Opcode) { + default: break; + case ARM::tLDRpci_pic: + case ARM::t2LDRpci_pic: { + unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) + ? ARM::tLDRpci : ARM::t2LDRpci; + unsigned DstReg = MI.getOperand(0).getReg(); + if (!MI.getOperand(0).isDead()) { + MachineInstr *NewMI = + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(NewLdOpc), DstReg) + .addOperand(MI.getOperand(1))); + NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) + .addReg(DstReg, getDefRegState(true)) + .addReg(DstReg) + .addOperand(MI.getOperand(2)); + } + MI.eraseFromParent(); + Modified = true; + break; + } + case ARM::t2MOVi32imm: { + unsigned DstReg = MI.getOperand(0).getReg(); + unsigned Imm = MI.getOperand(1).getImm(); + unsigned Lo16 = Imm & 0xffff; + unsigned Hi16 = (Imm >> 16) & 0xffff; + if (!MI.getOperand(0).isDead()) { + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::t2MOVi16), DstReg) + .addImm(Lo16)); + AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get(ARM::t2MOVTi16)) + .addReg(DstReg, getDefRegState(true)) + .addReg(DstReg).addImm(Hi16)); + } + MI.eraseFromParent(); + Modified = true; + } + // FIXME: expand t2MOVi32imm + } + MBBI = NMBBI; + } + + return Modified; +} + +bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { + TII = MF.getTarget().getInstrInfo(); + + bool Modified = false; + for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E; + ++MFI) + Modified |= ExpandMBB(*MFI); + return Modified; +} + +/// createARMExpandPseudoPass - returns an instance of the pseudo instruction +/// expansion pass. +FunctionPass *llvm::createARMExpandPseudoPass() { + return new ARMExpandPseudo(); +} diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 1489cab..9be7454 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -81,7 +81,7 @@ public: bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base, SDValue &Offset); bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc); + SDValue &Opc, SDValue &Align); bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset, SDValue &Label); @@ -187,8 +187,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { void ARMDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); } @@ -491,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N, bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update, - SDValue &Opc) { + SDValue &Opc, SDValue &Align) { Addr = N; // Default to no writeback. Update = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32); + // Default to no alignment. + Align = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1010,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1036,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; std::vector<EVT> ResTys(NumVecs, VT); ResTys.push_back(MVT::Other); - return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1047,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Quad registers are directly supported for VLD2, // loading 2 pairs of D regs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain }; + const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; std::vector<EVT> ResTys(4, VT); ResTys.push_back(MVT::Other); - SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5); Chain = SDValue(VLd, 4); // Combine the even and odd subregs to produce the result. @@ -1071,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; - SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); + const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain }; + SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5); Chain = SDValue(VLdA, NumVecs+1); // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; - const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain }; - SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4); + const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, + Align, Chain }; + SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5); Chain = SDValue(VLdB, NumVecs+1); // Combine the even and odd subregs to produce the result. @@ -1098,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1126,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(MemAddr); Ops.push_back(MemUpdate); Ops.push_back(MemOpc); + Ops.push_back(Align); if (is64BitVector) { unsigned Opc = DOpcodes[OpcodeIndex]; for (unsigned Vec = 0; Vec < NumVecs; ++Vec) Ops.push_back(N->getOperand(Vec+3)); Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5); } EVT RegVT = GetNEONSubregVT(VT); @@ -1147,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, N->getOperand(Vec+3))); } Ops.push_back(Chain); - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9); } // Otherwise, quad registers are stored with two separate instructions, @@ -1163,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, Ops.push_back(Chain); unsigned Opc = QOpcodes0[OpcodeIndex]; SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+4); + MVT::Other, Ops.data(), NumVecs+5); Chain = SDValue(VStA, 1); // Store the odd subregs. Ops[0] = SDValue(VStA, 0); // MemAddr for (unsigned Vec = 0; Vec < NumVecs; ++Vec) - Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, N->getOperand(Vec+3)); - Ops[NumVecs+3] = Chain; + Ops[NumVecs+4] = Chain; Opc = QOpcodes1[OpcodeIndex]; SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, Ops.data(), NumVecs+4); + MVT::Other, Ops.data(), NumVecs+5); Chain = SDValue(VStB, 1); ReplaceUses(SDValue(N, 0), Chain); return NULL; @@ -1188,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, SDNode *N = Op.getNode(); DebugLoc dl = N->getDebugLoc(); - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + SDValue MemAddr, MemUpdate, MemOpc, Align; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align)) return NULL; SDValue Chain = N->getOperand(0); @@ -1226,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, Ops.push_back(MemAddr); Ops.push_back(MemUpdate); Ops.push_back(MemOpc); + Ops.push_back(Align); unsigned Opc = 0; if (is64BitVector) { @@ -1463,8 +1466,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } break; } - case ARMISD::FMRRD: - return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32, + case ARMISD::VMOVRRD: + return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32, Op.getOperand(0), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32)); case ISD::UMUL_LOHI: { @@ -1653,10 +1656,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { : ARM::MOVCCr; break; case MVT::f32: - Opc = ARM::FCPYScc; + Opc = ARM::VMOVScc; break; case MVT::f64: - Opc = ARM::FCPYDcc; + Opc = ARM::VMOVDcc; break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); @@ -1680,10 +1683,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { default: assert(false && "Illegal conditional move type!"); break; case MVT::f32: - Opc = ARM::FNEGScc; + Opc = ARM::VNEGScc; break; case MVT::f64: - Opc = ARM::FNEGDcc; + Opc = ARM::VNEGDcc; break; } return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index b6ce5dd..c3af8e6 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -133,7 +133,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) { + : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); if (Subtarget->isTargetDarwin()) { @@ -389,7 +389,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) - // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2. + // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2. setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); // We want to custom lower some of our intrinsics. @@ -434,7 +434,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // We have target-specific dag combine patterns for the following nodes: - // ARMISD::FMRRD - No need to call setTargetDAGCombine + // ARMISD::VMOVRRD - No need to call setTargetDAGCombine setTargetDAGCombine(ISD::ADD); setTargetDAGCombine(ISD::SUB); @@ -493,8 +493,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; case ARMISD::RRX: return "ARMISD::RRX"; - case ARMISD::FMRRD: return "ARMISD::FMRRD"; - case ARMISD::FMDRR: return "ARMISD::FMDRR"; + case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; + case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; @@ -790,7 +790,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); - Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); if (VA.getLocVT() == MVT::v2f64) { SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); @@ -805,7 +805,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); Chain = Hi.getValue(1); InFlag = Hi.getValue(2); - Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, DAG.getConstant(1, MVT::i32)); } @@ -870,7 +870,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, SmallVector<SDValue, 8> &MemOpChains, ISD::ArgFlagsTy Flags) { - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Arg); RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); @@ -1004,6 +1004,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, bool isDirect = false; bool isARMFunc = false; bool isLocalARMFunc = false; + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { GlobalValue *GV = G->getGlobal(); isDirect = true; @@ -1015,6 +1017,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, isLocalARMFunc = !Subtarget->isThumb() && !isExt; // tBX takes a register source operand. if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); @@ -1023,7 +1026,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else @@ -1036,6 +1039,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // tBX takes a register source operand. const char *Sym = S->getSymbol(); if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), Sym, ARMPCLabelIndex, 4); SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); @@ -1043,7 +1047,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); } else @@ -1145,7 +1149,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, // Extract the first half and return it in two registers. SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, DAG.getConstant(0, MVT::i32)); - SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl, + SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), Half); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); @@ -1162,7 +1166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, } // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is // available. - SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl, + SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); Flag = Chain.getValue(1); @@ -1208,6 +1212,9 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { } SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = 0; DebugLoc DL = Op.getDebugLoc(); EVT PtrVT = getPointerTy(); BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); @@ -1217,6 +1224,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); } else { unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; + ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, ARMCP::CPBlockAddress, PCAdj); @@ -1227,7 +1235,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) { PseudoSourceValue::getConstantPool(), 0); if (RelocM == Reloc::Static) return Result; - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); } @@ -1238,6 +1246,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, DebugLoc dl = GA->getDebugLoc(); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, ARMCP::CPValue, PCAdj, "tlsgd", true); @@ -1247,7 +1258,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, PseudoSourceValue::getConstantPool(), 0); SDValue Chain = Argument.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); // call __tls_get_addr. @@ -1279,7 +1290,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); if (GV->isDeclaration()) { - // initial exec model + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); + // Initial exec model. unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, @@ -1290,7 +1304,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, PseudoSourceValue::getConstantPool(), 0); Chain = Offset.getValue(1); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, @@ -1355,6 +1369,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) { + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = 0; EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); @@ -1363,6 +1380,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (RelocM == Reloc::Static) CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); else { + ARMPCLabelIndex = AFI->createConstPoolEntryUId(); unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); @@ -1375,7 +1393,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -1390,6 +1408,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG){ assert(Subtarget->isTargetELF() && "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; @@ -1400,7 +1421,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, PseudoSourceValue::getConstantPool(), 0); - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -1416,6 +1437,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { } case Intrinsic::eh_sjlj_lsda: { MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); EVT PtrVT = getPointerTy(); DebugLoc dl = Op.getDebugLoc(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -1433,7 +1456,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { - SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32); + SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } return Result; @@ -1522,7 +1545,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, if (NextVA.isMemLoc()) { unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8; MachineFrameInfo *MFI = MF.getFrameInfo(); - int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(), + true, false); // Create load node to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1533,7 +1557,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); } - return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); } SDValue @@ -1636,7 +1660,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), + true, false); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -1664,7 +1689,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, // the result of va_next. AFI->setVarArgsRegSaveSize(VARegSaveSize); VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset + - VARegSaveSize - VARegSize); + VARegSaveSize - VARegSize, + true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy()); SmallVector<SDValue, 4> MemOps; @@ -1688,7 +1714,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, &MemOps[0], MemOps.size()); } else // This will point to the next argument passed via stack. - VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); + VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false); } return Chain; @@ -1710,46 +1736,41 @@ static bool isFloatingPointZero(SDValue Op) { return false; } -static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) { - return ( isThumb1Only && (C & ~255U) == 0) || - (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1); -} - /// Returns appropriate ARM CMP (cmp) and corresponding condition code for /// the given operands. -static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only, - DebugLoc dl) { +SDValue +ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) { if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); - if (!isLegalCmpImmediate(C, isThumb1Only)) { + if (!isLegalICmpImmediate(C)) { // Constant does not fit, try adjusting it by one? switch (CC) { default: break; case ISD::SETLT: case ISD::SETGE: - if (isLegalCmpImmediate(C-1, isThumb1Only)) { + if (isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETULT: case ISD::SETUGE: - if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) { + if (C > 0 && isLegalICmpImmediate(C-1)) { CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; RHS = DAG.getConstant(C-1, MVT::i32); } break; case ISD::SETLE: case ISD::SETGT: - if (isLegalCmpImmediate(C+1, isThumb1Only)) { + if (isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; RHS = DAG.getConstant(C+1, MVT::i32); } break; case ISD::SETULE: case ISD::SETUGT: - if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) { + if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; RHS = DAG.getConstant(C+1, MVT::i32); } @@ -1785,8 +1806,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } -static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); @@ -1798,7 +1818,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType() == MVT::i32) { SDValue ARMCC; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp); } @@ -1820,8 +1840,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, return Result; } -static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) { SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1832,7 +1851,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG, if (LHS.getValueType() == MVT::i32) { SDValue ARMCC; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl); + SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMCC, CCR,Cmp); } @@ -2049,16 +2068,16 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { SDValue Op = N->getOperand(0); DebugLoc dl = N->getDebugLoc(); if (N->getValueType(0) == MVT::f64) { - // Turn i64->f64 into FMDRR. + // Turn i64->f64 into VMOVDRR. SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(1, MVT::i32)); - return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi); + return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); } - // Turn f64->i64 into FMRRD. - SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl, + // Turn f64->i64 into VMOVRRD. + SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); // Merge the pieces into a single i64 value. @@ -2115,8 +2134,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { /// LowerShiftRightParts - Lower SRA_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2140,7 +2158,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, ST->isThumb1Only(), dl); + ARMCC, DAG, dl); SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR, Cmp); @@ -2151,8 +2169,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i32 values and take a 2 x i32 value to shift plus a shift amount. -static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG, - const ARMSubtarget *ST) { +SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -2174,7 +2191,7 @@ static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG, SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, - ARMCC, DAG, ST->isThumb1Only(), dl); + ARMCC, DAG, dl); SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC, CCR, Cmp); @@ -2860,8 +2877,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); - case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); + case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); @@ -2878,9 +2895,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); - case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG, Subtarget); + case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); case ISD::SRL_PARTS: - case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, Subtarget); + case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); case ISD::VSETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); @@ -3155,12 +3172,12 @@ static SDValue PerformSUBCombine(SDNode *N, return SDValue(); } -/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD. -static SDValue PerformFMRRDCombine(SDNode *N, +/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD. +static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { // fmrrd(fmdrr x, y) -> x,y SDValue InDouble = N->getOperand(0); - if (InDouble.getOpcode() == ARMISD::FMDRR) + if (InDouble.getOpcode() == ARMISD::VMOVDRR) return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); return SDValue(); } @@ -3455,7 +3472,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, default: break; case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); - case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI); + case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: @@ -3683,6 +3700,18 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, return true; } +/// isLegalICmpImmediate - Return true if the specified immediate is legal +/// icmp immediate, that is the target has icmp instructions which can compare +/// a register against the immediate without having to materialize the +/// immediate into a register. +bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { + if (!Subtarget->isThumb()) + return ARM_AM::getSOImmVal(Imm) != -1; + if (Subtarget->isThumb2()) + return ARM_AM::getT2SOImmVal(Imm) != -1; + return Imm >= 0 && Imm <= 255; +} + static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, @@ -3737,7 +3766,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, return true; } - // FIXME: Use FLDM / FSTM to emulate indexed FP load / store. + // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. return false; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 9c7a91d..4f31f8a 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -62,8 +62,8 @@ namespace llvm { SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out. RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag. - FMRRD, // double to two gprs. - FMDRR, // Two gprs to double. + VMOVRRD, // double to two gprs. + VMOVDRR, // Two gprs to double. EH_SJLJ_SETJMP, // SjLj exception handling setjmp. EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. @@ -180,6 +180,12 @@ namespace llvm { virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const; bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const; + /// isLegalICmpImmediate - Return true if the specified immediate is legal + /// icmp immediate, that is the target has icmp instructions which can compare + /// a register against the immediate without having to materialize the + /// immediate into a register. + virtual bool isLegalICmpImmediate(int64_t Imm) const; + /// getPreIndexedAddressParts - returns true by value, base pointer and /// offset pointer and addressing mode by reference if the node's address /// can be legally represented as pre-indexed load / store address. @@ -278,8 +284,12 @@ namespace llvm { SelectionDAG &DAG); SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); + SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG); + SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG); SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG); + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, @@ -315,6 +325,9 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + + SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl); }; } diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp index 86bbe2a..87bb12b 100644 --- a/lib/Target/ARM/ARMInstrInfo.cpp +++ b/lib/Target/ARM/ARMInstrInfo.cpp @@ -80,22 +80,26 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { } void ARMInstrInfo:: -reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { +reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, + unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { DebugLoc dl = Orig->getDebugLoc(); - if (Orig->getOpcode() == ARM::MOVi2pieces) { + unsigned Opcode = Orig->getOpcode(); + switch (Opcode) { + default: + break; + case ARM::MOVi2pieces: { RI.emitLoadConstPool(MBB, I, dl, DestReg, SubIdx, Orig->getOperand(1).getImm(), (ARMCC::CondCodes)Orig->getOperand(2).getImm(), Orig->getOperand(3).getReg()); + MachineInstr *NewMI = prior(I); + NewMI->getOperand(0).setSubReg(SubIdx); return; } + } - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->getOperand(0).setReg(DestReg); - MBB.insert(I, MI); + return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI); } diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h index 5d1678d..4319577 100644 --- a/lib/Target/ARM/ARMInstrInfo.h +++ b/lib/Target/ARM/ARMInstrInfo.h @@ -35,15 +35,16 @@ public: // Return true if the block does not fall through. bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const; + void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + unsigned DestReg, unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; + /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should /// always be able to get register info as well (through this method). /// const ARMRegisterInfo &getRegisterInfo() const { return RI; } - - void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const; }; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cbe80b4..3fe634e 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>, // addrmode6 := reg with optional writeback // def addrmode6 : Operand<i32>, - ComplexPattern<i32, 3, "SelectAddrMode6", []> { + ComplexPattern<i32, 4, "SelectAddrMode6", []> { let PrintMethod = "printAddrMode6Operand"; - let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm); + let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm); } // addrmodepc := pc + reg @@ -377,15 +377,13 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; let isCommutable = Commutable; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } } @@ -396,24 +394,22 @@ let Defs = [CPSR] in { multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> { def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, opc, "s\t$dst, $a, $b", + IIC_iALUi, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> { let Inst{20} = 1; let Inst{25} = 1; } def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, - IIC_iALUr, opc, "s\t$dst, $a, $b", + IIC_iALUr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { let isCommutable = Commutable; - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; } def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, opc, "s\t$dst, $a, $b", + IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -435,7 +431,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr, opc, "\t$a, $b", [(opnode GPR:$a, GPR:$b)]> { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; let isCommutable = Commutable; @@ -443,8 +439,6 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr, opc, "\t$a, $b", [(opnode GPR:$a, so_reg:$b)]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -501,20 +495,22 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { let isCommutable = Commutable; - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } - // Carry setting variants +} +// Carry setting variants +let Defs = [CPSR] in { +multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode, + bit Commutable = 0> { def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), - DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"), + DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; @@ -522,26 +518,25 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, let Inst{25} = 1; } def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), - DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"), + DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{20} = 1; let Inst{25} = 0; } def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), - DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"), + DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"), [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } } } +} //===----------------------------------------------------------------------===// // Instructions @@ -652,6 +647,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), let isReturn = 1, isTerminator = 1, isBarrier = 1 in def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, "bx", "\tlr", [(ARMretflag)]> { + let Inst{3-0} = 0b1110; let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; @@ -664,6 +660,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { let Inst{7-4} = 0b0001; let Inst{19-8} = 0b111111111111; let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0b1110; } } @@ -673,7 +670,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM_RET : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb", []>; // On non-Darwin platforms R9 is callee-saved. @@ -762,6 +759,7 @@ let isBranch = 1, isTerminator = 1 in { def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id), IIC_Br, "mov\tpc, $target \n$jt", [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> { + let Inst{15-12} = 0b1111; let Inst{20} = 0; // S Bit let Inst{24-21} = 0b1101; let Inst{27-25} = 0b000; @@ -771,6 +769,7 @@ let isBranch = 1, isTerminator = 1 in { IIC_Br, "ldr\tpc, $target \n$jt", [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, imm:$id)]> { + let Inst{15-12} = 0b1111; let Inst{20} = 1; // L bit let Inst{21} = 0; // W bit let Inst{22} = 0; // B bit @@ -782,6 +781,7 @@ let isBranch = 1, isTerminator = 1 in { IIC_Br, "add\tpc, $target, $idx \n$jt", [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, imm:$id)]> { + let Inst{15-12} = 0b1111; let Inst{20} = 0; // S bit let Inst{24-21} = 0b0100; let Inst{27-25} = 0b000; @@ -813,26 +813,26 @@ def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr, // Loads with zero extension def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "h\t$dst, $addr", + IIC_iLoadr, "ldrh", "\t$dst, $addr", [(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>; def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, - IIC_iLoadr, "ldr", "b\t$dst, $addr", + IIC_iLoadr, "ldrb", "\t$dst, $addr", [(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>; // Loads with sign extension def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sh\t$dst, $addr", + IIC_iLoadr, "ldrsh", "\t$dst, $addr", [(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>; def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "sb\t$dst, $addr", + IIC_iLoadr, "ldrsb", "\t$dst, $addr", [(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>; let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { // Load doubleword def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, - IIC_iLoadr, "ldr", "d\t$dst1, $addr", + IIC_iLoadr, "ldrd", "\t$dst1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed loads @@ -846,35 +846,35 @@ def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb), def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode2:$addr), LdFrm, IIC_iLoadru, - "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru, - "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>; def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb), (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru, - "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>; + "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>; def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb), (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru, - "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>; + "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>; } // Store @@ -884,18 +884,18 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, // Stores with truncate def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "h\t$src, $addr", + "strh", "\t$src, $addr", [(truncstorei16 GPR:$src, addrmode3:$addr)]>; def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer, - "str", "b\t$src, $addr", + "strb", "\t$src, $addr", [(truncstorei8 GPR:$src, addrmode2:$addr)]>; // Store doubleword let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr), StMiscFrm, IIC_iStorer, - "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; + "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), @@ -915,28 +915,28 @@ def STR_POST : AI2stwpo<(outs GPR:$base_wb), def STRH_PRE : AI3sthpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h\t$src, [$base, $offset]!", "$base = $base_wb", + "strh", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>; def STRH_POST: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am3offset:$offset), StMiscFrm, IIC_iStoreru, - "str", "h\t$src, [$base], $offset", "$base = $base_wb", + "strh", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti16 GPR:$src, GPR:$base, am3offset:$offset))]>; def STRB_PRE : AI2stbpr<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b\t$src, [$base, $offset]!", "$base = $base_wb", + "strb", "\t$src, [$base, $offset]!", "$base = $base_wb", [(set GPR:$base_wb, (pre_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; def STRB_POST: AI2stbpo<(outs GPR:$base_wb), (ins GPR:$src, GPR:$base,am2offset:$offset), StFrm, IIC_iStoreru, - "str", "b\t$src, [$base], $offset", "$base = $base_wb", + "strb", "\t$src, [$base], $offset", "$base = $base_wb", [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, am2offset:$offset))]>; @@ -947,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb), let mayLoad = 1, hasExtraDefRegAllocReq = 1 in def LDM : AXI4ld<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb", []>; let mayStore = 1, hasExtraSrcRegAllocReq = 1 in def STM : AXI4st<(outs), (ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops), - LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb", + LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb", []>; //===----------------------------------------------------------------------===// @@ -963,15 +963,13 @@ def STM : AXI4st<(outs), let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mov", "\t$dst, $src", []>, UnaryDP { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; } def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } @@ -1016,10 +1014,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1", + IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1", [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP; def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, - IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1", + IIC_iMOVsi, "movs", "\t$dst, $src, asr #1", [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP; } @@ -1095,15 +1093,19 @@ defm SUB : AsI1_bin_irs<0b0010, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>>; // ADD and SUB with 's' bit set. -defm ADDS : AI1_bin_s_irs<0b0100, "add", - BinOpFrag<(addc node:$LHS, node:$RHS)>>; -defm SUBS : AI1_bin_s_irs<0b0010, "sub", +defm ADDS : AI1_bin_s_irs<0b0100, "adds", + BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>; +defm SUBS : AI1_bin_s_irs<0b0010, "subs", BinOpFrag<(subc node:$LHS, node:$RHS)>>; defm ADC : AI1_adde_sube_irs<0b0101, "adc", BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; defm SBC : AI1_adde_sube_irs<0b0110, "sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>; +defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs", + BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>; +defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs", + BinOpFrag<(sube node:$LHS, node:$RHS)>>; // These don't define reg/reg forms, because they are handled above. def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, @@ -1115,24 +1117,20 @@ def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsb", "\t$dst, $a, $b", [(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } // RSB with 's' bit set. let Defs = [CPSR] in { def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm, - IIC_iALUi, "rsb", "s\t$dst, $a, $b", + IIC_iALUi, "rsbs", "\t$dst, $a, $b", [(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> { let Inst{20} = 1; let Inst{25} = 1; } def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, - IIC_iALUsr, "rsb", "s\t$dst, $a, $b", + IIC_iALUsr, "rsbs", "\t$dst, $a, $b", [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -1149,8 +1147,6 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } } @@ -1168,8 +1164,6 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b", [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>, Requires<[IsARM, CarryDefIsUnused]> { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -1216,14 +1210,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", "\t$dst, $src", [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; } def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mvn", "\t$dst, $src", - [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { - let Inst{4} = 1; - let Inst{7} = 0; -} + [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mvn", "\t$dst, $imm", @@ -1536,7 +1527,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { - let Inst{4} = 0; + let Inst{11-4} = 0b00000000; let Inst{25} = 0; } @@ -1545,8 +1536,6 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst), "mov", "\t$dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { - let Inst{4} = 1; - let Inst{7} = 0; let Inst{25} = 0; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 25c4acd..e1353b7 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -102,6 +102,19 @@ def addrmode_neonldstm : Operand<i32>, } */ +def h8imm : Operand<i8> { + let PrintMethod = "printHex8ImmOperand"; +} +def h16imm : Operand<i16> { + let PrintMethod = "printHex16ImmOperand"; +} +def h32imm : Operand<i32> { + let PrintMethod = "printHex32ImmOperand"; +} +def h64imm : Operand<i64> { + let PrintMethod = "printHex64ImmOperand"; +} + //===----------------------------------------------------------------------===// // NEON load / store instructions //===----------------------------------------------------------------------===// @@ -133,7 +146,7 @@ def VLDMS : NI<(outs), // Use vldmia to load a Q register as a D register pair. def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm, - "vldmia $addr, ${dst:dregpair}", + "vldmia\t$addr, ${dst:dregpair}", [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -145,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), // Use vstmia to store a Q register as a D register pair. def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem, - "vstmia $addr, ${src:dregpair}", + "vstmia\t$addr, ${src:dregpair}", [(store (v2f64 QPR:$src), addrmode4:$addr)]> { let Inst{27-25} = 0b110; let Inst{24} = 0; // P bit @@ -2282,7 +2295,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, // VMOV : Vector Move (Register) -def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), +def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src), IIC_VMOVD, "vmov\t$dst, $src", "", []>; def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src), IIC_VMOVD, "vmov\t$dst, $src", "", []>; @@ -2325,38 +2338,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{ // be encoded based on the immed values. def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst), - (ins i8imm:$SIMM), IIC_VMOVImm, + (ins h8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>; def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst), - (ins i8imm:$SIMM), IIC_VMOVImm, + (ins h8imm:$SIMM), IIC_VMOVImm, "vmov.i8\t$dst, $SIMM", "", [(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>; def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i16imm:$SIMM), IIC_VMOVImm, + (ins h16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>; def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i16imm:$SIMM), IIC_VMOVImm, + (ins h16imm:$SIMM), IIC_VMOVImm, "vmov.i16\t$dst, $SIMM", "", [(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>; def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst), - (ins i32imm:$SIMM), IIC_VMOVImm, + (ins h32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>; def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst), - (ins i32imm:$SIMM), IIC_VMOVImm, + (ins h32imm:$SIMM), IIC_VMOVImm, "vmov.i32\t$dst, $SIMM", "", [(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>; def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst), - (ins i64imm:$SIMM), IIC_VMOVImm, + (ins h64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>; def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst), - (ins i64imm:$SIMM), IIC_VMOVImm, + (ins h64imm:$SIMM), IIC_VMOVImm, "vmov.i64\t$dst, $SIMM", "", [(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>; diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 5d02925..2796364 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -740,3 +740,13 @@ def : T1Pat<(i32 thumb_immshifted:$src), def : T1Pat<(i32 imm0_255_comp:$src), (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>; + +// Pseudo instruction that combines ldr from constpool and add pc. This should +// be expanded into two instructions late to allow if-conversion and +// scheduling. +let isReMaterializable = 1 in +def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), + NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), + imm:$cp))]>, + Requires<[IsThumb1Only]>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 5bfda37..1bb9bfd 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1179,3 +1179,13 @@ let isReMaterializable = 1 in def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi, "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}", [(set GPR:$dst, (i32 imm:$src))]>; + +// Pseudo instruction that combines ldr from constpool and add pc. This should +// be expanded into two instructions late to allow if-conversion and +// scheduling. +let isReMaterializable = 1 in +def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp), + NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc", + [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)), + imm:$cp))]>, + Requires<[IsThumb2]>; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 455c33b..ba341f4 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -17,7 +17,7 @@ def SDT_ITOF : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>; def SDT_CMPFP0 : SDTypeProfile<0, 1, [SDTCisFP<0>]>; -def SDT_FMDRR : +def SDT_VMOVDRR : SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; @@ -28,7 +28,7 @@ def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>; def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>; def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>; def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>; -def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>; +def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>; //===----------------------------------------------------------------------===// // Operand Definitions. @@ -55,21 +55,21 @@ def vfp_f64imm : Operand<f64>, // let canFoldAsLoad = 1 in { -def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad64, "fldd", "\t$dst, $addr", +def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr), + IIC_fpLoad64, "vldr", ".64\t$dst, $addr", [(set DPR:$dst, (load addrmode5:$addr))]>; -def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), - IIC_fpLoad32, "flds", "\t$dst, $addr", +def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr), + IIC_fpLoad32, "vldr", ".32\t$dst, $addr", [(set SPR:$dst, (load addrmode5:$addr))]>; } // canFoldAsLoad -def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), - IIC_fpStore64, "fstd", "\t$src, $addr", +def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr), + IIC_fpStore64, "vstr", ".64\t$src, $addr", [(store DPR:$src, addrmode5:$addr)]>; -def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), - IIC_fpStore32, "fsts", "\t$src, $addr", +def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), + IIC_fpStore32, "vstr", ".32\t$src, $addr", [(store SPR:$src, addrmode5:$addr)]>; //===----------------------------------------------------------------------===// @@ -77,32 +77,32 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr), // let mayLoad = 1, hasExtraDefRegAllocReq = 1 in { -def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}d${p}\t${addr:base}, $wb", + "vldm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } -def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpLoadm, - "fldm${addr:submode}s${p}\t${addr:base}, $wb", + "vldm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 1; } } // mayLoad, hasExtraDefRegAllocReq let mayStore = 1, hasExtraSrcRegAllocReq = 1 in { -def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}d${p}\t${addr:base}, $wb", + "vstm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } -def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, +def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, variable_ops), IIC_fpStorem, - "fstm${addr:submode}s${p}\t${addr:base}, $wb", + "vstm${addr:submode}${p}\t${addr:base}, $wb", []> { let Inst{20} = 0; } @@ -114,68 +114,68 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb, // FP Binary Operations. // -def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "faddd", "\t$dst, $a, $b", +def VADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>; -def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fadds", "\t$dst, $a, $b", +def VADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>; // These are encoded as unary instructions. let Defs = [FPSCR] in { -def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), - IIC_fpCMP64, "fcmped", "\t$a, $b", +def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b), + IIC_fpCMP64, "vcmpe", ".f64\t$a, $b", [(arm_cmpfp DPR:$a, DPR:$b)]>; -def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), - IIC_fpCMP32, "fcmpes", "\t$a, $b", +def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b), + IIC_fpCMP32, "vcmpe", ".f32\t$a, $b", [(arm_cmpfp SPR:$a, SPR:$b)]>; } -def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpDIV64, "fdivd", "\t$dst, $a, $b", +def VDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>; -def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpDIV32, "fdivs", "\t$dst, $a, $b", +def VDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>; -def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fmuld", "\t$dst, $a, $b", +def VMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>; -def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fmuls", "\t$dst, $a, $b", +def VMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>; - -def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b", + +def VNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> { let Inst{6} = 1; } -def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b", +def VNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> { let Inst{6} = 1; } // Match reassociated forms only if not sign dependent rounding. def : Pat<(fmul (fneg DPR:$a), DPR:$b), - (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>; def : Pat<(fmul (fneg SPR:$a), SPR:$b), - (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; + (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>; -def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), - IIC_fpALU64, "fsubd", "\t$dst, $a, $b", +def VSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b), + IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> { let Inst{6} = 1; } -def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), - IIC_fpALU32, "fsubs", "\t$dst, $a, $b", +def VSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), + IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> { let Inst{6} = 1; } @@ -184,31 +184,31 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b), // FP Unary Operations. // -def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fabsd", "\t$dst, $a", +def VABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vabs", ".f64\t$dst, $a", [(set DPR:$dst, (fabs DPR:$a))]>; -def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fabss", "\t$dst, $a", +def VABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vabs", ".f32\t$dst, $a", [(set SPR:$dst, (fabs SPR:$a))]>; let Defs = [FPSCR] in { -def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), - IIC_fpCMP64, "fcmpezd", "\t$a", +def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a), + IIC_fpCMP64, "vcmpe", ".f64\t$a, #0", [(arm_cmpfp0 DPR:$a)]>; -def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), - IIC_fpCMP32, "fcmpezs", "\t$a", +def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a), + IIC_fpCMP32, "vcmpe", ".f32\t$a, #0", [(arm_cmpfp0 SPR:$a)]>; } -def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTDS, "fcvtds", "\t$dst, $a", +def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a", [(set DPR:$dst, (fextend SPR:$a))]>; // Special case encoding: bits 11-8 is 0b1011. -def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, - IIC_fpCVTSD, "fcvtsd", "\t$dst, $a", +def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, + IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a", [(set SPR:$dst, (fround DPR:$a))]> { let Inst{27-23} = 0b11101; let Inst{21-16} = 0b110111; @@ -217,52 +217,52 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm, } let neverHasSideEffects = 1 in { -def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>; +def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>; -def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fcpys", "\t$dst, $a", []>; +def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>; } // neverHasSideEffects -def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpUNA64, "fnegd", "\t$dst, $a", +def VNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpUNA64, "vneg", ".f64\t$dst, $a", [(set DPR:$dst, (fneg DPR:$a))]>; -def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpUNA32, "fnegs", "\t$dst, $a", +def VNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpUNA32, "vneg", ".f32\t$dst, $a", [(set SPR:$dst, (fneg SPR:$a))]>; -def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), - IIC_fpSQRT64, "fsqrtd", "\t$dst, $a", +def VSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a), + IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a", [(set DPR:$dst, (fsqrt DPR:$a))]>; -def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), - IIC_fpSQRT32, "fsqrts", "\t$dst, $a", +def VSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a), + IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a", [(set SPR:$dst, (fsqrt SPR:$a))]>; //===----------------------------------------------------------------------===// // FP <-> GPR Copies. Int <-> FP Conversions. // -def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), - IIC_VMOVSI, "fmrs", "\t$dst, $src", +def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src), + IIC_VMOVSI, "vmov", "\t$dst, $src", [(set GPR:$dst, (bitconvert SPR:$src))]>; -def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), - IIC_VMOVIS, "fmsr", "\t$dst, $src", +def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src), + IIC_VMOVIS, "vmov", "\t$dst, $src", [(set SPR:$dst, (bitconvert GPR:$src))]>; -def FMRRD : AVConv3I<0b11000101, 0b1011, +def VMOVRRD : AVConv3I<0b11000101, 0b1011, (outs GPR:$wb, GPR:$dst2), (ins DPR:$src), - IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src", + IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src", [/* FIXME: Can't write pattern for multiple result instr*/]>; // FMDHR: GPR -> SPR // FMDLR: GPR -> SPR -def FMDRR : AVConv5I<0b11000100, 0b1011, +def VMOVDRR : AVConv5I<0b11000100, 0b1011, (outs DPR:$dst), (ins GPR:$src1, GPR:$src2), - IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2", + IIC_VMOVID, "vmov", "\t$dst, $src1, $src2", [(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>; // FMRDH: SPR -> GPR @@ -277,53 +277,53 @@ def FMDRR : AVConv5I<0b11000100, 0b1011, // Int to FP: -def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fsitod", "\t$dst, $a", +def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a", [(set DPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fsitos", "\t$dst, $a", +def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a", [(set SPR:$dst, (arm_sitof SPR:$a))]> { let Inst{7} = 1; } -def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), - IIC_fpCVTID, "fuitod", "\t$dst, $a", +def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a), + IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a", [(set DPR:$dst, (arm_uitof SPR:$a))]>; -def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), - IIC_fpCVTIS, "fuitos", "\t$dst, $a", +def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a), + IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a", [(set SPR:$dst, (arm_uitof SPR:$a))]>; // FP to Int: // Always set Z bit in the instruction, i.e. "round towards zero" variants. -def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, +def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftosizd", "\t$dst, $a", + IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftosi DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, +def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftosizs", "\t$dst, $a", + IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftosi SPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, +def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011, (outs SPR:$dst), (ins DPR:$a), - IIC_fpCVTDI, "ftouizd", "\t$dst, $a", + IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a", [(set SPR:$dst, (arm_ftoui DPR:$a))]> { let Inst{7} = 1; // Z bit } -def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, +def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, (outs SPR:$dst), (ins SPR:$a), - IIC_fpCVTSI, "ftouizs", "\t$dst, $a", + IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a", [(set SPR:$dst, (arm_ftoui SPR:$a))]> { let Inst{7} = 1; // Z bit } @@ -332,54 +332,54 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010, // FP FMA Operations. // -def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmacd", "\t$dst, $a, $b", +def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmacs", "\t$dst, $a, $b", +def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fmscd", "\t$dst, $a, $b", +def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fmscs", "\t$dst, $a, $b", +def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>, RegConstraint<"$dstin = $dst">; -def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b", +def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b", +def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)), - (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>; def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)), - (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; + (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>; -def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), - IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b", +def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b), + IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b", [(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; } -def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), - IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b", +def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), + IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b", [(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>, RegConstraint<"$dstin = $dst"> { let Inst{6} = 1; @@ -389,27 +389,27 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b), // FP Conditional moves. // -def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100, +def VMOVDcc : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fcpyd", "\t$dst, $true", + IIC_fpUNA64, "vmov", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100, +def VMOVScc : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fcpys", "\t$dst, $true", + IIC_fpUNA32, "vmov", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, +def VNEGDcc : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$false, DPR:$true), - IIC_fpUNA64, "fnegd", "\t$dst, $true", + IIC_fpUNA64, "vneg", ".f64\t$dst, $true", [/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; -def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, +def VNEGScc : ASuI<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$false, SPR:$true), - IIC_fpUNA32, "fnegs", "\t$dst, $true", + IIC_fpUNA32, "vneg", ".f32\t$dst, $true", [/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>, RegConstraint<"$false = $dst">; @@ -418,8 +418,11 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100, // Misc. // +// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags +// to APSR. let Defs = [CPSR], Uses = [FPSCR] in -def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", +def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs", + "\tapsr_nzcv, fpscr", [(arm_fmstat)]> { let Inst{27-20} = 0b11101111; let Inst{19-16} = 0b0001; @@ -431,26 +434,26 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "", // Materialize FP immediates. VFP3 only. -let isReMaterializable = 1 in -def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), +let isReMaterializable = 1 in { +def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconsts", "\t$dst, $imm", - [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { + "fconstd", "\t$dst, $imm", + [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; let Inst{11-9} = 0b101; - let Inst{8} = 0; + let Inst{8} = 1; let Inst{7-4} = 0b0000; } -let isReMaterializable = 1 in -def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm), +def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm), VFPMiscFrm, IIC_VMOVImm, - "fconstd", "\t$dst, $imm", - [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> { + "fconsts", "\t$dst, $imm", + [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> { let Inst{27-23} = 0b11101; let Inst{21-20} = 0b11; let Inst{11-9} = 0b101; - let Inst{8} = 1; + let Inst{8} = 0; let Inst{7-4} = 0b0000; } +} diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 7e1783b..304d0ef 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -41,8 +41,8 @@ using namespace llvm; STATISTIC(NumLDMGened , "Number of ldm instructions generated"); STATISTIC(NumSTMGened , "Number of stm instructions generated"); -STATISTIC(NumFLDMGened, "Number of fldm instructions generated"); -STATISTIC(NumFSTMGened, "Number of fstm instructions generated"); +STATISTIC(NumVLDMGened, "Number of vldm instructions generated"); +STATISTIC(NumVSTMGened, "Number of vstm instructions generated"); STATISTIC(NumLdStMoved, "Number of load / store instructions moved"); STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation"); STATISTIC(NumSTRDFormed,"Number of strd created before allocation"); @@ -127,18 +127,18 @@ static int getLoadStoreMultipleOpcode(int Opcode) { case ARM::t2STRi12: NumSTMGened++; return ARM::t2STM; - case ARM::FLDS: - NumFLDMGened++; - return ARM::FLDMS; - case ARM::FSTS: - NumFSTMGened++; - return ARM::FSTMS; - case ARM::FLDD: - NumFLDMGened++; - return ARM::FLDMD; - case ARM::FSTD: - NumFSTMGened++; - return ARM::FSTMD; + case ARM::VLDRS: + NumVLDMGened++; + return ARM::VLDMS; + case ARM::VSTRS: + NumVSTMGened++; + return ARM::VSTMS; + case ARM::VLDRD: + NumVLDMGened++; + return ARM::VLDMD; + case ARM::VSTRD: + NumVSTMGened++; + return ARM::VSTMD; default: llvm_unreachable("Unhandled opcode!"); } return 0; @@ -229,8 +229,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, BaseKill = true; // New base is always killed right its use. } - bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD; - bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD; + bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; Opcode = getLoadStoreMultipleOpcode(Opcode); MachineInstrBuilder MIB = (isAM4) ? BuildMI(MBB, MBBI, dl, TII->get(Opcode)) @@ -373,27 +373,27 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) { case ARM::t2LDRi12: case ARM::t2STRi8: case ARM::t2STRi12: - case ARM::FLDS: - case ARM::FSTS: + case ARM::VLDRS: + case ARM::VSTRS: return 4; - case ARM::FLDD: - case ARM::FSTD: + case ARM::VLDRD: + case ARM::VSTRD: return 8; case ARM::LDM: case ARM::STM: case ARM::t2LDM: case ARM::t2STM: return (MI->getNumOperands() - 5) * 4; - case ARM::FLDMS: - case ARM::FSTMS: - case ARM::FLDMD: - case ARM::FSTMD: + case ARM::VLDMS: + case ARM::VSTMS: + case ARM::VLDMD: + case ARM::VSTMD: return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4; } } /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base -/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible: +/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible: /// /// stmia rn, <ra, rb, rc> /// rn := rn + 4 * 3; @@ -475,7 +475,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB, } } } else { - // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops. + // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops. if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm())) return false; @@ -517,10 +517,10 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_PRE; case ARM::STR: return ARM::STR_PRE; - case ARM::FLDS: return ARM::FLDMS; - case ARM::FLDD: return ARM::FLDMD; - case ARM::FSTS: return ARM::FSTMS; - case ARM::FSTD: return ARM::FSTMD; + case ARM::VLDRS: return ARM::VLDMS; + case ARM::VLDRD: return ARM::VLDMD; + case ARM::VSTRS: return ARM::VSTMS; + case ARM::VSTRD: return ARM::VSTMD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_PRE; @@ -536,10 +536,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) { switch (Opc) { case ARM::LDR: return ARM::LDR_POST; case ARM::STR: return ARM::STR_POST; - case ARM::FLDS: return ARM::FLDMS; - case ARM::FLDD: return ARM::FLDMD; - case ARM::FSTS: return ARM::FSTMS; - case ARM::FSTD: return ARM::FSTMD; + case ARM::VLDRS: return ARM::VLDMS; + case ARM::VLDRD: return ARM::VLDMD; + case ARM::VSTRS: return ARM::VSTMS; + case ARM::VSTRD: return ARM::VSTMD; case ARM::t2LDRi8: case ARM::t2LDRi12: return ARM::t2LDR_POST; @@ -564,8 +564,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, unsigned Bytes = getLSMultipleTransferSize(MI); int Opcode = MI->getOpcode(); DebugLoc dl = MI->getDebugLoc(); - bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS || - Opcode == ARM::FSTD || Opcode == ARM::FSTS; + bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS || + Opcode == ARM::VSTRD || Opcode == ARM::VSTRS; bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0) return false; @@ -575,7 +575,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (MI->getOperand(2).getImm() != 0) return false; - bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD; + bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD; // Can't do the merge if the destination register is the same as the would-be // writeback register. if (isLd && MI->getOperand(0).getReg() == Base) @@ -626,7 +626,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, if (!DoMerge) return false; - bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD; + bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD; unsigned Offset = 0; if (isAM5) Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub) @@ -638,7 +638,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes; if (isLd) { if (isAM5) - // FLDMS, FLDMD + // VLDMS, VLDMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)) .addReg(Base, getKillRegState(BaseKill)) .addImm(Offset).addImm(Pred).addReg(PredReg) @@ -657,7 +657,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB, } else { MachineOperand &MO = MI->getOperand(0); if (isAM5) - // FSTMS, FSTMD + // VSTMS, VSTMD BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset) .addImm(Pred).addReg(PredReg) .addReg(Base, getDefRegState(true)) // WB base register @@ -687,11 +687,11 @@ static bool isMemoryOp(const MachineInstr *MI) { case ARM::LDR: case ARM::STR: return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0; - case ARM::FLDS: - case ARM::FSTS: + case ARM::VLDRS: + case ARM::VSTRS: return MI->getOperand(1).isReg(); - case ARM::FLDD: - case ARM::FSTD: + case ARM::VLDRD: + case ARM::VSTRD: return MI->getOperand(1).isReg(); case ARM::t2LDRi8: case ARM::t2LDRi12: @@ -866,6 +866,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef, Pred, PredReg, TII, isT2); } else { + if (OddReg == EvenReg && EvenDeadKill) { + // If the two source operands are the same, the kill marker is probably + // on the first one. e.g. + // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0 + EvenDeadKill = false; + OddDeadKill = true; + } InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc, EvenReg, EvenDeadKill, EvenUndef, BaseReg, false, BaseUndef, OffReg, false, OffUndef, @@ -1214,7 +1221,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, if (!STI->hasV5TEOps()) return false; - // FIXME: FLDS / FSTS -> FLDD / FSTD + // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD unsigned Scale = 1; unsigned Opcode = Op0->getOpcode(); if (Opcode == ARM::LDR) @@ -1456,7 +1463,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) { continue; int Opc = MI->getOpcode(); - bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD; + bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD; unsigned Base = MI->getOperand(1).getReg(); int Offset = getMemoryOpOffset(MI); diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 5af95c3..432ed78 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -16,6 +16,7 @@ #include "llvm/GlobalValue.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; static cl::opt<bool> @@ -108,6 +109,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, if (UseNEONFP.getPosition() == 0) UseNEONForSinglePrecisionFP = true; } + HasBranchTargetBuffer = (CPUString == "cortex-a8" || + CPUString == "cortex-a9"); } /// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol. @@ -159,3 +162,13 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const { return false; } + +bool ARMSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(&ARM::GPRRegClass); + return PostRAScheduler && OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index e721a7f..3d0e01e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -17,6 +17,7 @@ #include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtarget.h" +#include "ARMBaseRegisterInfo.h" #include <string> namespace llvm { @@ -49,6 +50,9 @@ protected: /// determine if NEON should actually be used. bool UseNEONForSinglePrecisionFP; + /// HasBranchTargetBuffer - True if processor can predict indirect branches. + bool HasBranchTargetBuffer; + /// IsThumb - True if we are in thumb mode, false if in ARM mode. bool IsThumb; @@ -122,17 +126,16 @@ protected: bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); } bool hasThumb2() const { return ThumbMode >= Thumb2; } + bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; } + bool isR9Reserved() const { return IsR9Reserved; } const std::string & getCPUString() const { return CPUString; } - /// enablePostRAScheduler - True at 'More' optimization except - /// for Thumb1. + /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_CRITICAL; - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; - } + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index b4ce1d7..2564ed9 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -21,8 +21,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: @@ -61,8 +60,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget), DataLayout(Subtarget.isAPCS_ABI() ? - std::string("e-p:32:32-f64:32:32-i64:32:32") : - std::string("e-p:32:32-f64:64:64-i64:64:64")), + std::string("e-p:32:32-f64:32:32-i64:32:32-n32") : + std::string("e-p:32:32-f64:64:64-i64:64:64-n32")), TLInfo(*this) { } @@ -74,9 +73,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT, : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), DataLayout(Subtarget.isAPCS_ABI() ? std::string("e-p:32:32-f64:32:32-i64:32:32-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32") : + "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") : std::string("e-p:32:32-f64:64:64-i64:64:64-" - "i16:16:32-i8:8:32-i1:8:32-a:0:32")), + "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")), TLInfo(*this) { } @@ -94,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, if (Subtarget.hasNEON()) PM.add(createNEONPreAllocPass()); + // Calculate and set max stack object alignment early, so we can decide + // whether we will need stack realignment (and thus FP). + PM.add(createARMMaxStackAlignmentCalculatorPass()); + // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); @@ -106,6 +109,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass()); + // Expand some pseudo instructions into multiple instructions to allow + // proper scheduling. + PM.add(createARMExpandPseudoPass()); + return true; } diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp index 6cb3e9e4..0352503 100644 --- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp @@ -43,6 +43,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" @@ -138,6 +139,19 @@ namespace { void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum); void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum); + void printHex8ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff); + } + void printHex16ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff); + } + void printHex32ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff); + } + void printHex64ImmOperand(const MachineInstr *MI, int OpNum) { + O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm()); + } + virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode); virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum, @@ -199,7 +213,7 @@ namespace { if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")"; if (ACPV->getPCAdjustment() != 0) { O << "-(" << MAI->getPrivateGlobalPrefix() << "PC" - << ACPV->getLabelId() + << getFunctionNumber() << "_" << ACPV->getLabelId() << "+" << (unsigned)ACPV->getPCAdjustment(); if (ACPV->mustAddCurrentAddress()) O << "-."; @@ -333,6 +347,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, &ARM::DPR_VFP2RegClass); O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']'; } else { + assert(!MO.getSubReg() && "Subregs should be eliminated!"); O << getRegisterName(Reg); } break; @@ -594,12 +609,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op, if (Modifier && strcmp(Modifier, "submode") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - if (MO1.getReg() == ARM::SP) { - bool isFLDM = (MI->getOpcode() == ARM::FLDMD || - MI->getOpcode() == ARM::FLDMS); - O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. @@ -623,9 +633,14 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) { const MachineOperand &MO1 = MI->getOperand(Op); const MachineOperand &MO2 = MI->getOperand(Op+1); const MachineOperand &MO3 = MI->getOperand(Op+2); + const MachineOperand &MO4 = MI->getOperand(Op+3); - // FIXME: No support yet for specifying alignment. - O << "[" << getRegisterName(MO1.getReg()) << "]"; + O << "[" << getRegisterName(MO1.getReg()); + if (MO4.getImm()) { + // FIXME: Both darwin as and GNU as violate ARM docs here. + O << ", :" << MO4.getImm(); + } + O << "]"; if (ARM_AM::getAM6WBFlag(MO3.getImm())) { if (MO2.getReg() == 0) @@ -697,11 +712,8 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op, O << "[" << getRegisterName(MO1.getReg()); if (MO3.getReg()) O << ", " << getRegisterName(MO3.getReg()); - else if (unsigned ImmOffs = MO2.getImm()) { - O << ", #" << ImmOffs; - if (Scale > 1) - O << " * " << Scale; - } + else if (unsigned ImmOffs = MO2.getImm()) + O << ", #" << ImmOffs * Scale; O << "]"; } @@ -844,7 +856,8 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){ void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) { int Id = (int)MI->getOperand(OpNum).getImm(); - O << MAI->getPrivateGlobalPrefix() << "PC" << Id; + O << MAI->getPrivateGlobalPrefix() + << "PC" << getFunctionNumber() << "_" << Id; } void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) { @@ -1070,7 +1083,7 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstruction(MI); } - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; processDebugLoc(MI, false); @@ -1107,9 +1120,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) { } } - // Use unified assembler syntax mode for Thumb. - if (Subtarget->isThumb()) - O << "\t.syntax unified\n"; + // Use unified assembler syntax. + O << "\t.syntax unified\n"; // Emit ARM Build Attributes if (Subtarget->isTargetELF()) { @@ -1349,7 +1361,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { printKill(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: @@ -1365,7 +1376,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { // FIXME: MOVE TO SHARED PLACE. unsigned Id = (unsigned)MI->getOperand(2).getImm(); const char *Prefix = MAI->getPrivateGlobalPrefix(); - MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id)); + MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix) + + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id)); OutStreamer.EmitLabel(Label); diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp index f422798..0047925 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp @@ -259,12 +259,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum, if (Modifier && strcmp(Modifier, "submode") == 0) { ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm()); - if (MO1.getReg() == ARM::SP) { - bool isFLDM = (MI->getOpcode() == ARM::FLDMD || - MI->getOpcode() == ARM::FLDMS); - O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM); - } else - O << ARM_AM::getAMSubModeStr(Mode); + O << ARM_AM::getAMSubModeStr(Mode); return; } else if (Modifier && strcmp(Modifier, "base") == 0) { // Used for FSTM{D|S} and LSTM{D|S} operations. diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h index 5bf966b..9e7f8d5 100644 --- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h @@ -80,6 +80,10 @@ public: void printNoHashImmediate(const MCInst *MI, unsigned OpNum); void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {} void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {} + void printHex8ImmOperand(const MCInst *MI, int OpNum) {} + void printHex16ImmOperand(const MCInst *MI, int OpNum) {} + void printHex32ImmOperand(const MCInst *MI, int OpNum) {} + void printHex64ImmOperand(const MCInst *MI, int OpNum) {} void printPCLabel(const MCInst *MI, unsigned OpNum); // FIXME: Implement. diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp index 8686961..c49fee3 100644 --- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp +++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp @@ -137,6 +137,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) continue; + assert(!MO.getSubReg() && "Subregs should be eliminated!"); MCOp = MCOperand::CreateReg(MO.getReg()); break; case MachineOperand::MO_Immediate: diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index e071b61..964551f 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_target(ARMCodeGen ARMCodeEmitter.cpp ARMConstantIslandPass.cpp ARMConstantPoolValue.cpp + ARMExpandPseudoInsts.cpp ARMISelDAGToDAG.cpp ARMISelLowering.cpp ARMInstrInfo.cpp diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp index f307e3b..7d767ec 100644 --- a/lib/Target/ARM/NEONMoveFix.cpp +++ b/lib/Target/ARM/NEONMoveFix.cpp @@ -54,10 +54,10 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { NextMII = next(MII); MachineInstr *MI = &*MII; - if (MI->getOpcode() == ARM::FCPYD && + if (MI->getOpcode() == ARM::VMOVD && !TII->isPredicated(MI)) { unsigned SrcReg = MI->getOperand(1).getReg(); - // If we do not found an instruction defining the reg, this means the + // If we do not find an instruction defining the reg, this means the // register should be live-in for this BB. It's always to better to use // NEON reg-reg moves. unsigned Domain = ARMII::DomainNEON; @@ -71,7 +71,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { } if (Domain & ARMII::DomainNEON) { - // Convert FCPYD to VMOVD. + // Convert VMOVD to VMOVDneon unsigned DestReg = MI->getOperand(0).getReg(); DEBUG({errs() << "vmov convert: "; MI->dump();}); @@ -82,7 +82,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) { // - The imp-defs / imp-uses are superregs only, we don't care about // them. BuildMI(MBB, *MI, MI->getDebugLoc(), - TII->get(ARM::VMOVD), DestReg).addReg(SrcReg); + TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg); MBB.erase(MI); MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp index 8b2bcd0..206677b 100644 --- a/lib/Target/ARM/NEONPreAllocPass.cpp +++ b/lib/Target/ARM/NEONPreAllocPass.cpp @@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNd8: case ARM::VST2LNd16: case ARM::VST2LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; return true; case ARM::VST2q8: case ARM::VST2q16: case ARM::VST2q32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST2LNq16a: case ARM::VST2LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; Offset = 0; Stride = 2; @@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST2LNq16b: case ARM::VST2LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 2; Offset = 1; Stride = 2; @@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNd8: case ARM::VST3LNd16: case ARM::VST3LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; return true; case ARM::VST3q8a: case ARM::VST3q16a: case ARM::VST3q32a: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 3; Offset = 0; Stride = 2; @@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3q8b: case ARM::VST3q16b: case ARM::VST3q32b: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 3; Offset = 1; Stride = 2; @@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16a: case ARM::VST3LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; Offset = 0; Stride = 2; @@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST3LNq16b: case ARM::VST3LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 3; Offset = 1; Stride = 2; @@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNd8: case ARM::VST4LNd16: case ARM::VST4LNd32: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; return true; case ARM::VST4q8a: case ARM::VST4q16a: case ARM::VST4q32a: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 4; Offset = 0; Stride = 2; @@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4q8b: case ARM::VST4q16b: case ARM::VST4q32b: - FirstOpnd = 4; + FirstOpnd = 5; NumRegs = 4; Offset = 1; Stride = 2; @@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16a: case ARM::VST4LNq32a: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; Offset = 0; Stride = 2; @@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs, case ARM::VST4LNq16b: case ARM::VST4LNq32b: - FirstOpnd = 3; + FirstOpnd = 4; NumRegs = 4; Offset = 1; Stride = 2; diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt index e7770b2..6b605bb 100644 --- a/lib/Target/ARM/README-Thumb.txt +++ b/lib/Target/ARM/README-Thumb.txt @@ -37,7 +37,7 @@ LPCRELL0: mov r1, #PCRELV0 add r1, pc ldr r0, [r0, r1] - cpy pc, r0 + mov pc, r0 .align 2 LJTI1_0_0: .long LBB1_3 @@ -51,7 +51,7 @@ We should be able to generate: LPCRELL0: add r1, LJTI1_0_0 ldr r0, [r0, r1] - cpy pc, r0 + mov pc, r0 .align 2 LJTI1_0_0: .long LBB1_3 @@ -206,8 +206,8 @@ LPC0: add r5, pc ldr r6, LCPI1_1 ldr r2, LCPI1_2 - cpy r3, r6 - cpy lr, pc + mov r3, r6 + mov lr, pc bx r5 //===---------------------------------------------------------------------===// diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt index fb64d9f..11c48ad 100644 --- a/lib/Target/ARM/README.txt +++ b/lib/Target/ARM/README.txt @@ -321,7 +321,7 @@ time. 4) Once we added support for multiple result patterns, write indexed loads patterns instead of C++ instruction selection code. -5) Use FLDM / FSTM to emulate indexed FP load / store. +5) Use VLDM / VSTM to emulate indexed FP load / store. //===---------------------------------------------------------------------===// @@ -591,3 +591,8 @@ http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html //===---------------------------------------------------------------------===// Make use of the "rbit" instruction. + +//===---------------------------------------------------------------------===// + +Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how +to licm and cse the unnecessary load from cp#1. diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp index b6dd56c..7602b6d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.cpp +++ b/lib/Target/ARM/Thumb1InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===// +//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "ARMInstrInfo.h" +#include "Thumb1InstrInfo.h" #include "ARM.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h index 13cc578..b28229d 100644 --- a/lib/Target/ARM/Thumb1InstrInfo.h +++ b/lib/Target/ARM/Thumb1InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===// +//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index 5aaaf9c..37adf37 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===// +//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-1 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-1 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// @@ -794,7 +795,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { - // Unwind MBBI to point to first LDR / FLDD. + // Unwind MBBI to point to first LDR / VLDRD. const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h index 241f1cc..37ad388 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.h +++ b/lib/Target/ARM/Thumb1RegisterInfo.h @@ -1,4 +1,4 @@ -//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===// +//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-1 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-1 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp index 462844b..f5ba155 100644 --- a/lib/Target/ARM/Thumb2ITBlockPass.cpp +++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp @@ -1,4 +1,4 @@ -//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===// +//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -34,10 +34,6 @@ namespace { } private: - MachineBasicBlock::iterator - SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, - MachineInstr *MI, DebugLoc dl, - unsigned PredReg, ARMCC::CondCodes CC); bool InsertITBlocks(MachineBasicBlock &MBB); }; char Thumb2ITBlockPass::ID = 0; @@ -50,34 +46,6 @@ static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){ return llvm::getInstrPredicate(MI, PredReg); } -MachineBasicBlock::iterator -Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - MachineInstr *MI, - DebugLoc dl, unsigned PredReg, - ARMCC::CondCodes CC) { - // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here. - // The only reason it was a single instruction was so it could be - // re-materialized. We want to split it before this and the thumb2 - // size reduction pass to make sure the IT mask is correct and expose - // width reduction opportunities. It doesn't make sense to do this in a - // separate pass so here it is. - unsigned DstReg = MI->getOperand(0).getReg(); - bool DstDead = MI->getOperand(0).isDead(); // Is this possible? - unsigned Imm = MI->getOperand(1).getImm(); - unsigned Lo16 = Imm & 0xffff; - unsigned Hi16 = (Imm >> 16) & 0xffff; - BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg) - .addImm(Lo16).addImm(CC).addReg(PredReg); - BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16)) - .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead)) - .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg); - --MBBI; - --MBBI; - MI->eraseFromParent(); - return MBBI; -} - bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { bool Modified = false; @@ -88,11 +56,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { unsigned PredReg = 0; ARMCC::CondCodes CC = getPredicate(MI, PredReg); - if (MI->getOpcode() == ARM::t2MOVi32imm) { - MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC); - continue; - } - if (CC == ARMCC::AL) { ++MBBI; continue; @@ -115,11 +78,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) { DebugLoc ndl = NMI->getDebugLoc(); unsigned NPredReg = 0; ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg); - if (NMI->getOpcode() == ARM::t2MOVi32imm) { - MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC); - continue; - } - if (NCC == OCC) { Mask |= (1 << Pos); } else if (NCC != CC) diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 21fff51..16c1e6f 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===// +//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "ARMInstrInfo.h" +#include "Thumb2InstrInfo.h" #include "ARM.h" +#include "ARMConstantPoolValue.h" #include "ARMAddressingModes.h" #include "ARMGenInstrInfo.inc" #include "ARMMachineFunctionInfo.h" @@ -132,7 +133,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC); } - void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, DebugLoc dl, unsigned DestReg, unsigned BaseReg, int NumBytes, diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index f3688c0..663a60b 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===// +//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h index a295630..b3cf2e5 100644 --- a/lib/Target/ARM/Thumb2RegisterInfo.h +++ b/lib/Target/ARM/Thumb2RegisterInfo.h @@ -1,4 +1,4 @@ -//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===// +//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file contains the Thumb-2 implementation of the TargetRegisterInfo class. +// This file contains the Thumb-2 implementation of the TargetRegisterInfo +// class. // //===----------------------------------------------------------------------===// diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp index e3587fb..5b0a89d 100644 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp @@ -225,8 +225,6 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() { /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void AlphaDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp index cb03a6f..9217522 100644 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ b/lib/Target/Alpha/AlphaISelLowering.cpp @@ -426,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, } } else { //more args // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6)); + int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -444,7 +444,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); - int FI = MFI->CreateFixedObject(8, -8 * (6 - i)); + int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false); if (i == 0) VarArgsBase = FI; SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); @@ -452,7 +452,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain, if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); - FI = MFI->CreateFixedObject(8, - 8 * (12 - i)); + FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false); SDFI = DAG.getFrameIndex(FI, MVT::i64); LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0)); } diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td index 81e1fb7..8917e86 100644 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ b/lib/Target/Alpha/AlphaInstrInfo.td @@ -391,7 +391,7 @@ def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>; def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>; -let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { +let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp index 98e9730..64bdd62 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp @@ -314,7 +314,7 @@ unsigned AlphaRegisterInfo::getRARegister() const { return 0; } -unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? Alpha::R15 : Alpha::R30; } diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h index 66f0898..a971e21 100644 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ b/lib/Target/Alpha/AlphaRegisterInfo.h @@ -52,7 +52,7 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo { // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp index b8bc13b..d0d5a43 100644 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ b/lib/Target/Alpha/AlphaTargetMachine.cpp @@ -28,7 +28,7 @@ extern "C" void LLVMInitializeAlphaTarget() { AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), - DataLayout("e-f128:128:128"), + DataLayout("e-f128:128:128-n64"), FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0), JITInfo(*this), Subtarget(TT, FS), diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp index 209a5bf..338057b 100644 --- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp +++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp @@ -178,7 +178,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) { processDebugLoc(II, true); printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; processDebugLoc(II, false); diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp index 1900d00..917f7f5 100644 --- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp +++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp @@ -31,8 +31,8 @@ #include "llvm/Target/TargetRegistry.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" - using namespace llvm; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -143,7 +143,7 @@ bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { processDebugLoc(II, true); printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp index 4b321ec..c5c96f8 100644 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp @@ -25,7 +25,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/ADT/VectorExtras.h" #include "llvm/Support/Debug.h" - +#include "llvm/Support/ErrorHandling.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -207,7 +207,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, } else { assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); unsigned ObjSize = VA.getLocVT().getStoreSize(); - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), + true, false); SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0)); } diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td index c952af1..88ff85f 100644 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ b/lib/Target/Blackfin/BlackfinInstrInfo.td @@ -174,6 +174,7 @@ def CALLp: F1<(outs), (ins P:$func, variable_ops), let isReturn = 1, isTerminator = 1, + isBarrier = 1, Uses = [RETS] in def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>; diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp index c8c5925..ea9480d 100644 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp @@ -62,7 +62,6 @@ BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const { bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const { // Overload Table const bool OTable[] = { - false, // illegal intrinsic #define GET_INTRINSIC_OVERLOAD_TABLE #include "BlackfinGenIntrinsics.inc" #undef GET_INTRINSIC_OVERLOAD_TABLE diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp index 8c0a58a..224165b 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp @@ -368,7 +368,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (requiresRegisterScavenging(MF)) { // Reserve a slot close to SP or frame pointer. RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } @@ -449,7 +450,8 @@ unsigned BlackfinRegisterInfo::getRARegister() const { return BF::RETS; } -unsigned BlackfinRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned +BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? BF::FP : BF::SP; } diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h index 501f504..68ef08a 100644 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h @@ -76,7 +76,7 @@ namespace llvm { void emitPrologue(MachineFunction &MF) const; void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getRARegister() const; // Exception handling queries. diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp index 47ba2fe..45d7c35 100644 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp @@ -28,7 +28,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), - DataLayout("e-p:32:32-i64:32-f64:32"), + DataLayout("e-p:32:32-i64:32-f64:32-n32"), Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget), diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp index 007fe8f..dc9f81c4 100644 --- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp +++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp @@ -406,7 +406,7 @@ void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) { ++EmittedInsts; processDebugLoc(MI, true); printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); processDebugLoc(MI, false); O << '\n'; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index 1f9e5fc..c69a751 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -417,8 +417,6 @@ namespace { void SPUDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index aaf0783..4dd82a6 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -1090,7 +1090,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type // or we're forced to do vararg - int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); + int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); ArgOffset += StackSlotSize; @@ -1110,7 +1110,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, // Create the frame slot for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { - VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset); + VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset, + true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8); SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0); diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td index 09849da..d3b575a 100644 --- a/lib/Target/CellSPU/SPUInstrInfo.td +++ b/lib/Target/CellSPU/SPUInstrInfo.td @@ -3601,21 +3601,23 @@ def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)), (BRASL texternalsym:$func)>; // Unconditional branches: -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { - def BR : - UncondBranch<0b001001100, (outs), (ins brtarget:$dest), - "br\t$dest", - [(br bb:$dest)]>; - - // Unconditional, absolute address branch - def BRA: - UncondBranch<0b001100000, (outs), (ins brtarget:$dest), - "bra\t$dest", - [/* no pattern */]>; +let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { + let isBarrier = 1 in { + def BR : + UncondBranch<0b001001100, (outs), (ins brtarget:$dest), + "br\t$dest", + [(br bb:$dest)]>; + + // Unconditional, absolute address branch + def BRA: + UncondBranch<0b001100000, (outs), (ins brtarget:$dest), + "bra\t$dest", + [/* no pattern */]>; - // Indirect branch - def BI: - BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + // Indirect branch + def BI: + BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>; + } // Conditional branches: class BRNZInst<dag IOL, list<dag> pattern>: diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp index 8412006..af94e67 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.cpp +++ b/lib/Target/CellSPU/SPURegisterInfo.cpp @@ -596,7 +596,7 @@ SPURegisterInfo::getRARegister() const } unsigned -SPURegisterInfo::getFrameRegister(MachineFunction &MF) const +SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SPU::R1; } diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h index 1d9d07e..9691cb6 100644 --- a/lib/Target/CellSPU/SPURegisterInfo.h +++ b/lib/Target/CellSPU/SPURegisterInfo.h @@ -78,7 +78,7 @@ namespace llvm { //! Get return address register (LR, aka R0) unsigned getRARegister() const; //! Get the stack frame register (SP, aka R1) - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; //! Perform target-specific stack frame setup. void getInitialFrameState(std::vector<MachineMove> &Moves) const; diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h index 94ac73c..88201c6 100644 --- a/lib/Target/CellSPU/SPUSubtarget.h +++ b/lib/Target/CellSPU/SPUSubtarget.h @@ -82,7 +82,7 @@ namespace llvm { const char *getTargetDataString() const { return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128" "-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128" - "-s:128:128"; + "-s:128:128-n32:64"; } }; } // End llvm namespace diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp index 11ac931..145359f 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp @@ -22,6 +22,7 @@ #include "llvm/Constants.h" #include "llvm/DerivedTypes.h" #include "llvm/Module.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/DwarfWriter.h" #include "llvm/CodeGen/MachineModuleInfo.h" @@ -70,9 +71,6 @@ namespace { void printSrcMemOperand(const MachineInstr *MI, int OpNum, const char* Modifier = 0); void printCCOperand(const MachineInstr *MI, int OpNum); - void printInstruction(const MachineInstr *MI); // autogenerated. - static const char *getRegisterName(unsigned RegNo); - void printMachineInstruction(const MachineInstr * MI); bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, @@ -82,13 +80,10 @@ namespace { const char *ExtraCode); void printInstructionThroughMCStreamer(const MachineInstr *MI); + void PrintGlobalVariable(const GlobalVariable* GVar); void emitFunctionHeader(const MachineFunction &MF); bool runOnMachineFunction(MachineFunction &F); - virtual void PrintGlobalVariable(const GlobalVariable *GV) { - // FIXME: No support for global variables? - } - void getAnalysisUsage(AnalysisUsage &AU) const { AsmPrinter::getAnalysisUsage(AU); AU.setPreservesAll(); @@ -96,8 +91,89 @@ namespace { }; } // end of anonymous namespace -#include "MSP430GenAsmWriter.inc" +void MSP430AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) { + if (!GVar->hasInitializer()) + return; // External global require no code + + // Check to see if this is a special global used by LLVM, if so, emit it. + if (EmitSpecialLLVMGlobal(GVar)) + return; + + const TargetData *TD = TM.getTargetData(); + + std::string name = Mang->getMangledName(GVar); + Constant *C = GVar->getInitializer(); + unsigned Size = TD->getTypeAllocSize(C->getType()); + unsigned Align = TD->getPreferredAlignmentLog(GVar); + + printVisibility(name, GVar->getVisibility()); + + O << "\t.type\t" << name << ",@object\n"; + + OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang, + TM)); + + if (C->isNullValue() && !GVar->hasSection() && + !GVar->isThreadLocal() && + (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) { + + if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it. + + if (GVar->hasLocalLinkage()) + O << "\t.local\t" << name << '\n'; + + O << MAI->getCOMMDirective() << name << ',' << Size; + if (MAI->getCOMMDirectiveTakesAlignment()) + O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align); + + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + return; + } + + switch (GVar->getLinkage()) { + case GlobalValue::CommonLinkage: + case GlobalValue::LinkOnceAnyLinkage: + case GlobalValue::LinkOnceODRLinkage: + case GlobalValue::WeakAnyLinkage: + case GlobalValue::WeakODRLinkage: + O << "\t.weak\t" << name << '\n'; + break; + case GlobalValue::DLLExportLinkage: + case GlobalValue::AppendingLinkage: + // FIXME: appending linkage variables should go into a section of + // their name or something. For now, just emit them as external. + case GlobalValue::ExternalLinkage: + // If external or appending, declare as a global symbol + O << "\t.globl " << name << '\n'; + // FALL THROUGH + case GlobalValue::PrivateLinkage: + case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::InternalLinkage: + break; + default: + assert(0 && "Unknown linkage type!"); + } + + // Use 16-bit alignment by default to simplify bunch of stuff + EmitAlignment(Align, GVar); + O << name << ":"; + if (VerboseAsm) { + O.PadToColumn(MAI->getCommentColumn()); + O << MAI->getCommentString() << ' '; + WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent()); + } + O << '\n'; + EmitGlobalConstant(C); + + if (MAI->hasDotTypeDotSizeDirective()) + O << "\t.size\t" << name << ", " << Size << '\n'; +} void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) { const Function *F = MF.getFunction(); @@ -161,14 +237,9 @@ void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); - // Call the autogenerated instruction printer routines. - if (EnableMCInst) { - printInstructionThroughMCStreamer(MI); - } else { - printInstruction(MI); - } + printInstructionThroughMCStreamer(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; @@ -180,7 +251,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum, const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); + O << MSP430InstPrinter::getRegisterName(MO.getReg()); return; case MachineOperand::MO_Immediate: if (!Modifier || strcmp(Modifier, "nohash")) @@ -224,22 +295,23 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum, const MachineOperand &Base = MI->getOperand(OpNum); const MachineOperand &Disp = MI->getOperand(OpNum+1); - if (Base.isGlobal()) - printOperand(MI, OpNum, "mem"); - else if (Disp.isImm() && !Base.getReg()) + // Print displacement first + if (!Disp.isImm()) { + printOperand(MI, OpNum+1, "mem"); + } else { + if (!Base.getReg()) + O << '&'; + + printOperand(MI, OpNum+1, "nohash"); + } + + + // Print register base field + if (Base.getReg()) { + O << '('; printOperand(MI, OpNum); - else if (Base.getReg()) { - if (Disp.getImm()) { - printOperand(MI, OpNum + 1, "nohash"); - O << '('; - printOperand(MI, OpNum); - O << ')'; - } else { - O << '@'; - printOperand(MI, OpNum); - } - } else - llvm_unreachable("Unsupported memory operand"); + O << ')'; + } } void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) { @@ -294,8 +366,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, } //===----------------------------------------------------------------------===// -void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) -{ +void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI){ MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this); @@ -309,7 +380,6 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) printKill(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: @@ -324,7 +394,18 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) printMCInst(&TmpInst); } +static MCInstPrinter *createMSP430MCInstPrinter(const Target &T, + unsigned SyntaxVariant, + const MCAsmInfo &MAI, + raw_ostream &O) { + if (SyntaxVariant == 0) + return new MSP430InstPrinter(O, MAI); + return 0; +} + // Force static initialization. extern "C" void LLVMInitializeMSP430AsmPrinter() { RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target); + TargetRegistry::RegisterMCInstPrinter(TheMSP430Target, + createMSP430MCInstPrinter); } diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp index a3ecc67..0a403c4 100644 --- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp +++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp @@ -25,11 +25,9 @@ using namespace llvm; // Include the auto-generated portion of the assembly writer. #define MachineInstr MCInst -#define MSP430AsmPrinter MSP430InstPrinter // FIXME: REMOVE. #define NO_ASM_WRITER_BOILERPLATE #include "MSP430GenAsmWriter.inc" #undef MachineInstr -#undef MSP430AsmPrinter void MSP430InstPrinter::printInst(const MCInst *MI) { printInstruction(MI); @@ -65,25 +63,22 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo, const MCOperand &Base = MI->getOperand(OpNo); const MCOperand &Disp = MI->getOperand(OpNo+1); - // FIXME: move global to displacement field! - if (Base.isExpr()) { + // Print displacement first + if (Disp.isExpr()) { O << '&'; - Base.getExpr()->print(O, &MAI); - } else if (Disp.isImm() && !Base.isReg()) - printOperand(MI, OpNo); - else if (Base.isReg()) { - if (Disp.getImm()) { - O << Disp.getImm() << '('; - printOperand(MI, OpNo); - O << ')'; - } else { - O << '@'; - printOperand(MI, OpNo); - } + Disp.getExpr()->print(O, &MAI); } else { - Base.dump(); - Disp.dump(); - llvm_unreachable("Unsupported memory operand"); + assert(Disp.isImm() && "Expected immediate in displacement field"); + if (!Base.getReg()) + O << '&'; + + O << Disp.getImm(); + } + + + // Print register base field + if (Base.getReg()) { + O << '(' << getRegisterName(Base.getReg()) << ')'; } } diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td index 89313ab..870a3df 100644 --- a/lib/Target/MSP430/MSP430.td +++ b/lib/Target/MSP430/MSP430.td @@ -50,11 +50,17 @@ include "MSP430InstrInfo.td" def MSP430InstrInfo : InstrInfo {} + +def MSP430InstPrinter : AsmWriter { + string AsmWriterClassName = "InstPrinter"; +} + //===----------------------------------------------------------------------===// // Target Declaration //===----------------------------------------------------------------------===// def MSP430 : Target { let InstructionSet = MSP430InstrInfo; + let AssemblyWriters = [MSP430InstPrinter]; } diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp index b7d9282..c0084be 100644 --- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp +++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp @@ -45,6 +45,70 @@ static const bool ViewRMWDAGs = false; STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); + +namespace { + struct MSP430ISelAddressMode { + enum { + RegBase, + FrameIndexBase + } BaseType; + + struct { // This is really a union, discriminated by BaseType! + SDValue Reg; + int FrameIndex; + } Base; + + int16_t Disp; + GlobalValue *GV; + Constant *CP; + BlockAddress *BlockAddr; + const char *ES; + int JT; + unsigned Align; // CP alignment. + + MSP430ISelAddressMode() + : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0), + ES(0), JT(-1), Align(0) { + } + + bool hasSymbolicDisplacement() const { + return GV != 0 || CP != 0 || ES != 0 || JT != -1; + } + + bool hasBaseReg() const { + return Base.Reg.getNode() != 0; + } + + void setBaseReg(SDValue Reg) { + BaseType = RegBase; + Base.Reg = Reg; + } + + void dump() { + errs() << "MSP430ISelAddressMode " << this << '\n'; + if (Base.Reg.getNode() != 0) { + errs() << "Base.Reg "; + Base.Reg.getNode()->dump(); + } else { + errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'; + } + errs() << " Disp " << Disp << '\n'; + if (GV) { + errs() << "GV "; + GV->dump(); + } else if (CP) { + errs() << " CP "; + CP->dump(); + errs() << " Align" << Align << '\n'; + } else if (ES) { + errs() << "ES "; + errs() << ES << '\n'; + } else if (JT != -1) + errs() << " JT" << JT << " Align" << Align << '\n'; + } + }; +} + /// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine /// instructions for SelectionDAG operations. /// @@ -65,6 +129,10 @@ namespace { return "MSP430 DAG->DAG Pattern Instruction Selection"; } + bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM); + bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM); + bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM); + bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U, SDNode *Root) const; @@ -79,6 +147,10 @@ namespace { DenseMap<SDNode*, SDNode*> RMWStores; void PreprocessForRMW(); SDNode *Select(SDValue Op); + SDNode *SelectIndexedLoad(SDValue Op); + SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2, + unsigned Opc8, unsigned Opc16); + bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp); #ifndef NDEBUG @@ -95,50 +167,155 @@ FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM, return new MSP430DAGToDAGISel(TM, OptLevel); } -// FIXME: This is pretty dummy routine and needs to be rewritten in the future. -bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr, - SDValue &Base, SDValue &Disp) { - // Try to match frame address first. - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16); - Disp = CurDAG->getTargetConstant(0, MVT::i16); + +/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode. +/// These wrap things that will resolve down into a symbol reference. If no +/// match is possible, this returns true, otherwise it returns false. +bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) { + // If the addressing mode already has a symbol as the displacement, we can + // never match another symbol. + if (AM.hasSymbolicDisplacement()) return true; + + SDValue N0 = N.getOperand(0); + + if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { + AM.GV = G->getGlobal(); + AM.Disp += G->getOffset(); + //AM.SymbolFlags = G->getTargetFlags(); + } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { + AM.CP = CP->getConstVal(); + AM.Align = CP->getAlignment(); + AM.Disp += CP->getOffset(); + //AM.SymbolFlags = CP->getTargetFlags(); + } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { + AM.ES = S->getSymbol(); + //AM.SymbolFlags = S->getTargetFlags(); + } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { + AM.JT = J->getIndex(); + //AM.SymbolFlags = J->getTargetFlags(); + } else { + AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress(); + //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags(); } + return false; +} - switch (Addr.getOpcode()) { - case ISD::ADD: - // Operand is a result from ADD with constant operand which fits into i16. - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) { - uint64_t CVal = CN->getZExtValue(); - // Offset should fit into 16 bits. - if (((CVal << 48) >> 48) == CVal) { - SDValue N0 = Addr.getOperand(0); - if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N0)) - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16); - else - Base = N0; - - Disp = CurDAG->getTargetConstant(CVal, MVT::i16); - return true; - } +/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the +/// specified addressing mode without any further recursion. +bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) { + // Is the base register already occupied? + if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) { + // If so, we cannot select it. + return true; + } + + // Default, generate it as a register. + AM.BaseType = MSP430ISelAddressMode::RegBase; + AM.Base.Reg = N; + return false; +} + +bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) { + DebugLoc dl = N.getDebugLoc(); + DEBUG({ + errs() << "MatchAddress: "; + AM.dump(); + }); + + switch (N.getOpcode()) { + default: break; + case ISD::Constant: { + uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); + AM.Disp += Val; + return false; + } + + case MSP430ISD::Wrapper: + if (!MatchWrapper(N, AM)) + return false; + break; + + case ISD::FrameIndex: + if (AM.BaseType == MSP430ISelAddressMode::RegBase + && AM.Base.Reg.getNode() == 0) { + AM.BaseType = MSP430ISelAddressMode::FrameIndexBase; + AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); + return false; } break; - case MSP430ISD::Wrapper: - SDValue N0 = Addr.getOperand(0); - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { - Base = CurDAG->getTargetGlobalAddress(G->getGlobal(), - MVT::i16, G->getOffset()); - Disp = CurDAG->getTargetConstant(0, MVT::i16); - return true; - } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(N0)) { - Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16); - Disp = CurDAG->getTargetConstant(0, MVT::i16); + + case ISD::ADD: { + MSP430ISelAddressMode Backup = AM; + if (!MatchAddress(N.getNode()->getOperand(0), AM) && + !MatchAddress(N.getNode()->getOperand(1), AM)) + return false; + AM = Backup; + if (!MatchAddress(N.getNode()->getOperand(1), AM) && + !MatchAddress(N.getNode()->getOperand(0), AM)) + return false; + AM = Backup; + + break; + } + + case ISD::OR: + // Handle "X | C" as "X + C" iff X is known to have C bits clear. + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { + MSP430ISelAddressMode Backup = AM; + uint64_t Offset = CN->getSExtValue(); + // Start with the LHS as an addr mode. + if (!MatchAddress(N.getOperand(0), AM) && + // Address could not have picked a GV address for the displacement. + AM.GV == NULL && + // Check to see if the LHS & C is zero. + CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { + AM.Disp += Offset; + return false; + } + AM = Backup; } break; - }; + } + + return MatchAddressBase(N, AM); +} + +/// SelectAddr - returns true if it is able pattern match an addressing mode. +/// It returns the operands which make up the maximal addressing mode it can +/// match by reference. +bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, + SDValue &Base, SDValue &Disp) { + MSP430ISelAddressMode AM; + + if (MatchAddress(N, AM)) + return false; + + EVT VT = N.getValueType(); + if (AM.BaseType == MSP430ISelAddressMode::RegBase) { + if (!AM.Base.Reg.getNode()) + AM.Base.Reg = CurDAG->getRegister(0, VT); + } - Base = Addr; - Disp = CurDAG->getTargetConstant(0, MVT::i16); + Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ? + CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) : + AM.Base.Reg; + + if (AM.GV) + Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp, + 0/*AM.SymbolFlags*/); + else if (AM.CP) + Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16, + AM.Align, AM.Disp, 0/*AM.SymbolFlags*/); + else if (AM.ES) + Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/); + else if (AM.JT != -1) + Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/); + else if (AM.BlockAddr) + Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/, + true /*AM.SymbolFlags*/); + else + Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16); return true; } @@ -187,7 +364,7 @@ bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U, /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created /// during preprocessing) to determine whether it's legal to introduce such /// "cycle" for a moment. - DenseMap<SDNode*, SDNode*>::iterator I = RMWStores.find(Root); + DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root); if (I != RMWStores.end() && I->second == N) return true; @@ -423,6 +600,89 @@ void MSP430DAGToDAGISel::PreprocessForRMW() { } } + +static bool isValidIndexedLoad(const LoadSDNode *LD) { + ISD::MemIndexedMode AM = LD->getAddressingMode(); + if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD) + return false; + + EVT VT = LD->getMemoryVT(); + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::i8: + // Sanity check + if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1) + return false; + + break; + case MVT::i16: + // Sanity check + if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2) + return false; + + break; + default: + return false; + } + + return true; +} + +SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) { + LoadSDNode *LD = cast<LoadSDNode>(Op); + if (!isValidIndexedLoad(LD)) + return NULL; + + MVT VT = LD->getMemoryVT().getSimpleVT(); + + unsigned Opcode = 0; + switch (VT.SimpleTy) { + case MVT::i8: + Opcode = MSP430::MOV8rm_POST; + break; + case MVT::i16: + Opcode = MSP430::MOV16rm_POST; + break; + default: + return NULL; + } + + return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(), + VT, MVT::i16, MVT::Other, + LD->getBasePtr(), LD->getChain()); +} + +SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op, + SDValue N1, SDValue N2, + unsigned Opc8, unsigned Opc16) { + if (N1.getOpcode() == ISD::LOAD && + N1.hasOneUse() && + IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) { + LoadSDNode *LD = cast<LoadSDNode>(N1); + if (!isValidIndexedLoad(LD)) + return NULL; + + MVT VT = LD->getMemoryVT().getSimpleVT(); + unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8); + MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); + MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand(); + SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() }; + SDNode *ResNode = + CurDAG->SelectNodeTo(Op.getNode(), Opc, + VT, MVT::i16, MVT::Other, + Ops0, 3); + cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1); + // Transfer chain. + ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2)); + // Transfer writeback. + ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1)); + return ResNode; + } + + return NULL; +} + + /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void MSP430DAGToDAGISel::InstructionSelect() { @@ -438,8 +698,6 @@ void MSP430DAGToDAGISel::InstructionSelect() { DEBUG(errs() << "Selection DAG after RMW preprocessing:\n"); DEBUG(CurDAG->dump()); - DEBUG(BB->dump()); - // Codegen the basic block. DEBUG(errs() << "===== Instruction selection begins:\n"); DEBUG(Indent = 0); @@ -482,6 +740,72 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) { return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16, TFI, CurDAG->getTargetConstant(0, MVT::i16)); } + case ISD::LOAD: + if (SDNode *ResNode = SelectIndexedLoad(Op)) + return ResNode; + // Other cases are autogenerated. + break; + case ISD::ADD: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::ADD8rm_POST, MSP430::ADD16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::SUB: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::SUB8rm_POST, MSP430::SUB16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::AND: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::AND8rm_POST, MSP430::AND16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::AND8rm_POST, MSP430::AND16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::OR: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::OR8rm_POST, MSP430::OR16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::OR8rm_POST, MSP430::OR16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; + case ISD::XOR: + if (SDNode *ResNode = + SelectIndexedBinOp(Op, + Op.getOperand(0), Op.getOperand(1), + MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) + return ResNode; + else if (SDNode *ResNode = + SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0), + MSP430::XOR8rm_POST, MSP430::XOR16rm_POST)) + return ResNode; + + // Other cases are autogenerated. + break; } // Select the default instruction diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index 34e6d2c..5a925f5 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -62,10 +62,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setBooleanContents(ZeroOrOneBooleanContent); setSchedulingPreference(SchedulingForLatency); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); + // We have post-incremented loads / stores. + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + + setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); + setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); // We don't have any truncstores @@ -115,12 +119,23 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); // FIXME: Implement efficiently multiplication by a constant + setOperationAction(ISD::MUL, MVT::i8, Expand); + setOperationAction(ISD::MULHS, MVT::i8, Expand); + setOperationAction(ISD::MULHU, MVT::i8, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand); setOperationAction(ISD::MUL, MVT::i16, Expand); setOperationAction(ISD::MULHS, MVT::i16, Expand); setOperationAction(ISD::MULHU, MVT::i16, Expand); setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); + setOperationAction(ISD::UDIV, MVT::i8, Expand); + setOperationAction(ISD::UDIVREM, MVT::i8, Expand); + setOperationAction(ISD::UREM, MVT::i8, Expand); + setOperationAction(ISD::SDIV, MVT::i8, Expand); + setOperationAction(ISD::SDIVREM, MVT::i8, Expand); + setOperationAction(ISD::SREM, MVT::i8, Expand); setOperationAction(ISD::UDIV, MVT::i16, Expand); setOperationAction(ISD::UDIVREM, MVT::i16, Expand); setOperationAction(ISD::UREM, MVT::i16, Expand); @@ -303,7 +318,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, << "\n"; } // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset()); + int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -659,6 +674,42 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op, DAG.getValueType(Val.getValueType())); } +/// getPostIndexedAddressParts - returns true by value, base pointer and +/// offset pointer and addressing mode by reference if this node can be +/// combined with a load / store to form a post-indexed load / store. +bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const { + + LoadSDNode *LD = cast<LoadSDNode>(N); + if (LD->getExtensionType() != ISD::NON_EXTLOAD) + return false; + + EVT VT = LD->getMemoryVT(); + if (VT != MVT::i8 && VT != MVT::i16) + return false; + + if (Op->getOpcode() != ISD::ADD) + return false; + + if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) { + uint64_t RHSC = RHS->getZExtValue(); + if ((VT == MVT::i16 && RHSC != 2) || + (VT == MVT::i8 && RHSC != 1)) + return false; + + Base = Op->getOperand(0); + Offset = DAG.getConstant(RHSC, VT); + AM = ISD::POST_INC; + return true; + } + + return false; +} + + const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { default: return NULL; diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index fdbc384..d413ccb 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -136,6 +136,12 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, + SDValue &Base, + SDValue &Offset, + ISD::MemIndexedMode &AM, + SelectionDAG &DAG) const; + const MSP430Subtarget &Subtarget; const MSP430TargetMachine &TM; }; diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index a6d9638..b2f09c7 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -35,15 +35,23 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOStore, 0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); if (RC == &MSP430::GR16RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV16mr)) .addFrameIndex(FrameIdx).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else if (RC == &MSP430::GR8RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV8mr)) .addFrameIndex(FrameIdx).addImm(0) - .addReg(SrcReg, getKillRegState(isKill)); + .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO); else llvm_unreachable("Cannot store this register to stack slot!"); } @@ -54,13 +62,21 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterClass *RC) const{ DebugLoc DL = DebugLoc::getUnknownLoc(); if (MI != MBB.end()) DL = MI->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = *MF.getFrameInfo(); + + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx), + MachineMemOperand::MOLoad, 0, + MFI.getObjectSize(FrameIdx), + MFI.getObjectAlignment(FrameIdx)); if (RC == &MSP430::GR16RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV16rm)) - .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); + .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO); else if (RC == &MSP430::GR8RegClass) BuildMI(MBB, MI, DL, get(MSP430::MOV8rm)) - .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0); + .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO); else llvm_unreachable("Cannot store this register to stack slot!"); } diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td index 2b50669..c3bbfe8 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.td +++ b/lib/Target/MSP430/MSP430InstrInfo.td @@ -127,7 +127,7 @@ def NOP : Pseudo<(outs), (ins), "nop", []>; // // FIXME: Provide proper encoding! -let isReturn = 1, isTerminator = 1 in { +let isReturn = 1, isTerminator = 1, isBarrier = 1 in { def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>; } @@ -142,7 +142,7 @@ let isBarrier = 1 in // Conditional branches let Uses = [SRW] in def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc), - "j$cc $dst", + "j$cc\t$dst", [(MSP430brcc bb:$dst, imm:$cc)]>; } // isBranch, isTerminator @@ -215,6 +215,13 @@ def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src), "mov.b\t{$src, $dst}", [(set GR16:$dst, (zextloadi16i8 addr:$src))]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in { +def MOV8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR16:$base), + "mov.b\t{@$base+, $dst}", []>; +def MOV16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$base), + "mov.w\t{@$base+, $dst}", []>; +} + // Any instruction that defines a 8-bit result leaves the high half of the // register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may // be copying from a truncate, but any other 8-bit operation will zero-extend @@ -280,6 +287,15 @@ def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (add GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def ADD8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "add.b\t{@$base+, $dst}", []>; +def ADD16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "add.w\t{@$base+, $dst}", []>; +} + + def ADD8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), "add.b\t{$src2, $dst}", [(set GR8:$dst, (add GR8:$src1, imm:$src2)), @@ -409,6 +425,14 @@ def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (and GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def AND8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "and.b\t{@$base+, $dst}", []>; +def AND16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "and.w\t{@$base+, $dst}", []>; +} + let isTwoAddress = 0 in { def AND8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "and.b\t{$src, $dst}", @@ -438,6 +462,92 @@ def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), (implicit SRW)]>; } +let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y +def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; +def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; +} + +def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; +def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; + +def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "bis.b\t{$src2, $dst}", + [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; +def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "bis.w\t{$src2, $dst}", + [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; + +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def OR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "bis.b\t{@$base+, $dst}", []>; +def OR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "bis.w\t{@$base+, $dst}", []>; +} + +let isTwoAddress = 0 in { +def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>; +def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>; + +def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), + "bis.b\t{$src, $dst}", + [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>; +def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), + "bis.w\t{$src, $dst}", + [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>; + +def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bis.b\t{$src, $dst}", + [(store (or (i8 (load addr:$dst)), + (i8 (load addr:$src))), addr:$dst)]>; +def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bis.w\t{$src, $dst}", + [(store (or (i16 (load addr:$dst)), + (i16 (load addr:$src))), addr:$dst)]>; +} + +// bic does not modify condition codes +def BIC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), + "bic.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>; +def BIC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), + "bic.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>; + +def BIC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), + "bic.b\t{$src2, $dst}", + [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>; +def BIC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), + "bic.w\t{$src2, $dst}", + [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>; + +let isTwoAddress = 0 in { +def BIC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), + "bic.b\t{$src, $dst}", + [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>; +def BIC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), + "bic.w\t{$src, $dst}", + [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>; + +def BIC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bic.b\t{$src, $dst}", + [(store (and (load addr:$dst), (not (i8 (load addr:$src)))), addr:$dst)]>; +def BIC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), + "bic.w\t{$src, $dst}", + [(store (and (load addr:$dst), (not (i16 (load addr:$src)))), addr:$dst)]>; +} let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y def XOR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), @@ -468,6 +578,14 @@ def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def XOR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "xor.b\t{@$base+, $dst}", []>; +def XOR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "xor.w\t{@$base+, $dst}", []>; +} + let isTwoAddress = 0 in { def XOR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "xor.b\t{$src, $dst}", @@ -525,6 +643,14 @@ def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), [(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))), (implicit SRW)]>; +let mayLoad = 1, hasExtraDefRegAllocReq = 1, +Constraints = "$base = $base_wb, $src1 = $dst" in { +def SUB8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base), + "sub.b\t{@$base+, $dst}", []>; +def SUB16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base), + "sub.w\t{@$base+, $dst}", []>; +} + let isTwoAddress = 0 in { def SUB8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), "sub.b\t{$src, $dst}", @@ -650,58 +776,14 @@ def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), } // Defs = [SRW] +def ZEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), + "mov.b\t{$src, $dst}", + [(set GR16:$dst, (zext (trunc GR16:$src)))]>; + def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src), "swpb\t$dst", [(set GR16:$dst, (bswap GR16:$src))]>; -let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y -def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2), - "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>; -def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2), - "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>; -} - -def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2), - "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>; -def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2), - "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>; - -def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2), - "bis.b\t{$src2, $dst}", - [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>; -def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2), - "bis.w\t{$src2, $dst}", - [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>; - -let isTwoAddress = 0 in { -def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src), - "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>; -def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src), - "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>; - -def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src), - "bis.b\t{$src, $dst}", - [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>; -def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src), - "bis.w\t{$src, $dst}", - [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>; - -def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), - "bis.b\t{$src, $dst}", - [(store (or (i8 (load addr:$dst)), - (i8 (load addr:$src))), addr:$dst)]>; -def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src), - "bis.w\t{$src, $dst}", - [(store (or (i16 (load addr:$dst)), - (i16 (load addr:$src))), addr:$dst)]>; -} - } // isTwoAddress = 1 // Integer comparisons @@ -851,3 +933,6 @@ def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst), (SUB8mr addr:$dst, GR8:$src)>; def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst), (SUB8mm addr:$dst, addr:$src)>; + +// peephole patterns +def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>; diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp index 069313e..4e3a8d0 100644 --- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp +++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp @@ -15,6 +15,12 @@ using namespace llvm; MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) { + PrivateGlobalPrefix = ".L"; + WeakRefDirective ="\t.weak\t"; + SetDirective = "\t.set\t"; + PCSymbol="."; + AlignmentIsInBytes = false; AllowNameToStartWithDigit = true; + UsesELFSectionDirectiveForBSS = true; } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 1a5893e..92baad9 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -212,7 +212,7 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) const { // Create a frame entry for the FPW register that must be saved. if (hasFP(MF)) { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true, false); assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() && "Slot for FPW register must be last in order to be found!"); FrameIdx = 0; @@ -355,7 +355,7 @@ unsigned MSP430RegisterInfo::getRARegister() const { return MSP430::PCW; } -unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? MSP430::FPW : MSP430::SPW; } diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index 5f3a216..aa08787 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -60,7 +60,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; //! Get DWARF debugging register number int getDwarfRegNum(unsigned RegNum, bool isEH) const; diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index da54507..14db406 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -32,7 +32,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T, LLVMTargetMachine(T, TT), Subtarget(TT, FS), // FIXME: Check TargetData string. - DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32"), + DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { } diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt index b14e93d..5b9634b 100644 --- a/lib/Target/MSP430/README.txt +++ b/lib/Target/MSP430/README.txt @@ -11,8 +11,6 @@ available pretty soon. Some things are incomplete / not implemented yet (this list surely is not complete as well): -0. Implement asmprinting for variables :) - 1. Verify, how stuff is handling implicit zext with 8 bit operands (this might be modelled currently in improper way - should we need to mark the superreg as def for every 8 bit instruction?). diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp index 66ade89..4898fae 100644 --- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp @@ -282,7 +282,7 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print the assembly for the instruction. printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 810dce1..cbcedb8 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -108,7 +108,6 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void MipsDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); // Codegen the basic block. DEBUG(errs() << "===== Instruction selection begins:\n"); DEBUG(Indent = 0); @@ -171,6 +170,27 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base) return true; } } + + // When loading from constant pools, load the lower address part in + // the instruction itself. Instead of: + // lui $2, %hi($CPI1_0) + // addiu $2, $2, %lo($CPI1_0) + // lwc1 $f0, 0($2) + // Generate: + // lui $2, %hi($CPI1_0) + // lwc1 $f0, %lo($CPI1_0)($2) + if (Addr.getOperand(0).getOpcode() == MipsISD::Hi && + Addr.getOperand(1).getOpcode() == MipsISD::Lo) { + SDValue LoVal = Addr.getOperand(1); + if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>( + LoVal.getOperand(0))) { + if (!CP->getOffset()) { + Base = Addr.getOperand(0); + Offset = LoVal.getOperand(0); + return true; + } + } + } } Base = Addr; @@ -315,6 +335,16 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) { case ISD::GLOBAL_OFFSET_TABLE: return getGlobalBaseReg(); + case ISD::ConstantFP: { + ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N); + if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) { + SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32); + ReplaceUses(N, Zero); + return Zero.getNode(); + } + break; + } + /// Handle direct and indirect calls when using PIC. On PIC, when /// GOT is smaller than about 64k (small code) the GA target is /// loaded with only one instruction. Otherwise GA's target must diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 61da8f8..c9a43b4 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -568,7 +568,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); Constant *C = N->getConstVal(); SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), - MipsII::MO_ABS_HILO); + N->getOffset(), MipsII::MO_ABS_HILO); // FIXME there isn't actually debug info here DebugLoc dl = Op.getDebugLoc(); @@ -704,7 +704,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // the stack (even if less than 4 are used as arguments) if (Subtarget->isABI_O32()) { int VTsize = EVT(MVT::i32).getSizeInBits()/8; - MFI->CreateFixedObject(VTsize, (VTsize*3)); + MFI->CreateFixedObject(VTsize, (VTsize*3), true, false); CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); } else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); @@ -773,7 +773,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // if O32 ABI is used. For EABI the first address is zero. LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset()); int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, - LastArgStackLoc); + LastArgStackLoc, true, false); SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy()); @@ -849,7 +849,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create the frame index only once. SPOffset here can be anything // (this will be fixed on processFunctionBeforeFrameFinalized) if (MipsFI->getGPStackOffset() == -1) { - FI = MFI->CreateFixedObject(4, 0); + FI = MFI->CreateFixedObject(4, 0, true, false); MipsFI->setGPFI(FI); } MipsFI->setGPStackOffset(LastArgStackLoc); @@ -1002,7 +1002,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // be used on emitPrologue) to avoid mis-calc of the first stack // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. - int FI = MFI->CreateFixedObject(4, 0); + int FI = MFI->CreateFixedObject(4, 0, true, false); MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4))); SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy()); @@ -1025,7 +1025,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // offset on PEI::calculateFrameObjectOffsets. // Arguments are always 32-bit. unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; - int FI = MFI->CreateFixedObject(ArgSize, 0); + int FI = MFI->CreateFixedObject(ArgSize, 0, true, false); MipsFI->recordLoadArgsFI(FI, -(ArgSize+ (FirstStackArgLoc + VA.getLocMemOffset()))); diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index bd61738..ce89cfd 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -48,6 +48,7 @@ let PrintMethod = "printFCCOperand" in def In32BitMode : Predicate<"!Subtarget.isFP64bit()">; def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">; def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">; +def IsNotMipsI : Predicate<"!Subtarget.isMips1()">; //===----------------------------------------------------------------------===// // Instruction Class Templates @@ -173,7 +174,7 @@ let fd = 0 in { } /// Floating Point Memory Instructions -let Predicates = [IsNotSingleFloat] in { +let Predicates = [IsNotSingleFloat, IsNotMipsI] in { def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr), "ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>; @@ -284,7 +285,12 @@ def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>; +def fpimm0neg : PatLeaf<(fpimm), [{ + return N->isExactlyValue(-0.0); +}]>; + def : Pat<(f32 fpimm0), (MTC1 ZERO)>; +def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>; def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>; def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>; diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 9159904..af64c9f 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -134,6 +134,9 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC) const { DebugLoc DL = DebugLoc::getUnknownLoc(); + const MachineFunction *MF = MBB.getParent(); + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + if (I != MBB.end()) DL = I->getDebugLoc(); if (DestRC != SrcRC) { @@ -153,6 +156,13 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, else if ((DestRC == Mips::FGR32RegisterClass) && (SrcRC == Mips::CPURegsRegisterClass)) BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg); + else if ((DestRC == Mips::AFGR64RegisterClass) && + (SrcRC == Mips::CPURegsRegisterClass) && + (SrcReg == Mips::ZERO)) { + const unsigned *AliasSet = TRI->getAliasSet(DestReg); + BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[0]).addReg(SrcReg); + BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[1]).addReg(SrcReg); + } // Move from/to Hi/Lo registers else if ((DestRC == Mips::HILORegisterClass) && @@ -163,9 +173,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, (DestRC == Mips::CPURegsRegisterClass)) { unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO; BuildMI(MBB, I, DL, get(Opc), DestReg); - - // Can't copy this register - } else + } else + // Can't copy this register return false; return true; diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index 949c78a..a300f49 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -103,6 +103,7 @@ public: int getGPFI() const { return GPHolder.FI; } void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; } void setGPFI(int FI) { GPHolder.FI = FI; } + bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; } bool hasLoadArgs() const { return HasLoadArgs; } bool hasStoreVarArgs() const { return HasStoreVarArgs; } diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index d2289e9..ad326db 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -287,7 +287,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const } if (hasFP(MF)) { - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setFPStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; @@ -295,7 +295,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const } if (MFI->hasCalls()) { - MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize), + MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true), StackOffset); MipsFI->setRAStackOffset(StackOffset); TopCPUSavedRegOff = StackOffset; @@ -438,11 +438,10 @@ emitPrologue(MachineFunction &MF) const .addReg(Mips::SP).addReg(Mips::ZERO); } - // PIC speficic function prologue - if ((isPIC) && (MFI->hasCalls())) { + // Restore GP from the saved stack location + if (MipsFI->needGPSaveRestore()) BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE)) .addImm(MipsFI->getGPStackOffset()); - } } void MipsRegisterInfo:: @@ -489,13 +488,11 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const void MipsRegisterInfo:: processFunctionBeforeFrameFinalized(MachineFunction &MF) const { - // Set the SPOffset on the FI where GP must be saved/loaded. + // Set the stack offset where GP must be saved/loaded from. MachineFrameInfo *MFI = MF.getFrameInfo(); - bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_); - if (MFI->hasCalls() && isPIC) { - MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>(); + if (MipsFI->needGPSaveRestore()) MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset()); - } } unsigned MipsRegisterInfo:: @@ -504,7 +501,7 @@ getRARegister() const { } unsigned MipsRegisterInfo:: -getFrameRegister(MachineFunction &MF) const { +getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? Mips::FP : Mips::SP; } diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h index 122f786..5b45921 100644 --- a/lib/Target/Mips/MipsRegisterInfo.h +++ b/lib/Target/Mips/MipsRegisterInfo.h @@ -65,7 +65,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo { /// Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; /// Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 4fa5450..b3c2313 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -38,8 +38,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool isLittle=false): LLVMTargetMachine(T, TT), Subtarget(TT, FS, isLittle), - DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") : - std::string("E-p:32:32:32-i8:8:32-i16:16:32")), + DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") : + std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")), InstrInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0), TLInfo(*this) { diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp index b2a4c11..e1f2587 100644 --- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp +++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp @@ -46,7 +46,7 @@ PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) { processDebugLoc(MI, true); printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; processDebugLoc(MI, false); diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp index cc57d12..e13e6cd 100644 --- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp +++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp @@ -30,7 +30,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) { /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void PIC16DAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); } diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp index 635befe..71c3d37 100644 --- a/lib/Target/PIC16/PIC16ISelLowering.cpp +++ b/lib/Target/PIC16/PIC16ISelLowering.cpp @@ -1070,7 +1070,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op, // Put the value on stack. // Get a stack slot index and convert to es. - int FI = MF.getFrameInfo()->CreateStackObject(1, 1); + int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false); const char *tmpName = createESName(PAN::getTempdataLabel(FuncName)); SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8); diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp index 47087ab..8ba9a1d 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.cpp +++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp @@ -72,7 +72,7 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const { return -1; } -unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const { llvm_unreachable("PIC16 Does not have any frame register"); return 0; } diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h index 8aa5a10..1d5dbbf 100644 --- a/lib/Target/PIC16/PIC16RegisterInfo.h +++ b/lib/Target/PIC16/PIC16RegisterInfo.h @@ -59,7 +59,7 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo { virtual void emitPrologue(MachineFunction &MF) const; virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const; - virtual unsigned getFrameRegister(MachineFunction &MF) const; + virtual unsigned getFrameRegister(const MachineFunction &MF) const; virtual unsigned getRARegister() const; }; diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp index 08307e7..e2acb85 100644 --- a/lib/Target/PIC16/PIC16TargetMachine.cpp +++ b/lib/Target/PIC16/PIC16TargetMachine.cpp @@ -34,7 +34,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT, const std::string &FS, bool Trad) : LLVMTargetMachine(T, TT), Subtarget(TT, FS, Trad), - DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"), + DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { } diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index 2dac18f..aae4607 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -594,7 +594,7 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; @@ -672,14 +672,14 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n'; - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); // Emit post-function debug information. DW->EndFunction(&MF); + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + // We didn't modify anything. return false; } @@ -853,12 +853,12 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } } - // Print out jump tables referenced by the function. - EmitJumpTableInfo(MF.getJumpTableInfo(), MF); - // Emit post-function debug information. DW->EndFunction(&MF); + // Print out jump tables referenced by the function. + EmitJumpTableInfo(MF.getJumpTableInfo(), MF); + // We didn't modify anything. return false; } diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index b866240..fb9a240 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -187,8 +187,6 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void PPCDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Select target instructions for the DAG. SelectRoot(*CurDAG); CurDAG->RemoveDeadNodes(); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 7f48ef0..099fcb5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -637,7 +637,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) { unsigned BitSize; bool HasAnyUndefs; - if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32)) + if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) return CFP->getValueAPF().isNegZero(); @@ -1625,7 +1625,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), - isImmutable); + isImmutable, false); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, PtrVT); @@ -1690,9 +1690,10 @@ PPCTargetLowering::LowerFormalArguments_SVR4( NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - CCInfo.getNextStackOffset()); + CCInfo.getNextStackOffset(), + true, false); - VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8); + VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); // The fixed integer arguments of a variadic function are @@ -1895,7 +1896,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( CurArgOffset = CurArgOffset + (4 - ObjSize); } // The value of the object is its address. - int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(FIN); if (ObjSize==1 || ObjSize==2) { @@ -1918,7 +1919,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( // the object. if (GPR_idx != Num_GPR_Regs) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); - int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); + int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0); @@ -2043,7 +2044,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( if (needsLoad) { int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset + (ArgSize - ObjSize), - isImmutable); + isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0); } @@ -2076,7 +2077,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( int Depth = ArgOffset; VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, - Depth); + Depth, true, false); SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); // If this function is vararg, store any remaining integer argument regs @@ -2289,7 +2290,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, isDarwinABI); int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, - NewRetAddrLoc); + NewRetAddrLoc, + true, false); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, @@ -2300,7 +2302,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, if (isDarwinABI) { int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); - int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); + int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, + true, false); SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, PseudoSourceValue::getFixedStack(NewFPIdx), 0); @@ -2317,7 +2320,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { int Offset = ArgOffset + SPDiff; uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; - int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false); EVT VT = isPPC64 ? MVT::i64 : MVT::i32; SDValue FIN = DAG.getFrameIndex(FI, VT); TailCallArgumentInfo Info; @@ -3224,7 +3227,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { // Find out what the fix offset of the frame pointer save area. int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); + RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset, + true, false); // Save the result. FI->setReturnAddrSaveIndex(RASI); } @@ -3250,7 +3254,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -3411,7 +3416,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { // then lfd it and fcfid it. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *FrameInfo = MF.getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(8, 8); + int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -3469,7 +3474,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); // Save FP register to stack slot - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot, NULL, 0); @@ -3667,7 +3672,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { unsigned SplatBitSize; bool HasAnyUndefs; if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, - HasAnyUndefs) || SplatBitSize > 32) + HasAnyUndefs, 0, true) || SplatBitSize > 32) return SDValue(); unsigned SplatBits = APSplatBits.getZExtValue(); @@ -4137,7 +4142,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, DebugLoc dl = Op.getDebugLoc(); // Create a stack slot that is 16-byte aligned. MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); - int FrameIdx = FrameInfo->CreateStackObject(16, 16); + int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index cf5c7c0..e65e644 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -1043,7 +1043,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isDarwinABI); // Allocate the frame index for frame pointer save area. - FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); + FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset, + true, false); // Save the result. FI->setFramePointerSaveIndex(FPSI); } @@ -1051,7 +1052,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Reserve stack space to move the linkage area to in case of a tail call. int TCSPDelta = 0; if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) { - MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta); + MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, + true, false); } // Reserve a slot closest to SP or frame pointer if we have a dynalloc or @@ -1067,7 +1069,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, const TargetRegisterClass *G8RC = &PPC::G8RCRegClass; const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC; RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } @@ -1356,12 +1359,6 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment(); unsigned MaxAlign = MFI->getMaxAlignment(); - if (needsFrameMoves) { - // Mark effective beginning of when frame pointer becomes valid. - FrameLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId); - } - // Adjust stack pointer: r1 += NegFrameSize. // If there is a preferred stack alignment, align R1 now if (!IsPPC64) { @@ -1431,12 +1428,18 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X0); } } + + std::vector<MachineMove> &Moves = MMI->getFrameMoves(); + // Add the "machine moves" for the instructions we generated above, but in + // reverse order. if (needsFrameMoves) { - std::vector<MachineMove> &Moves = MMI->getFrameMoves(); - + // Mark effective beginning of when frame pointer becomes valid. + FrameLabelId = MMI->NextLabelID(); + BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId); + + // Show update of SP. if (NegFrameSize) { - // Show update of SP. MachineLocation SPDst(MachineLocation::VirtualFP); MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize); Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc)); @@ -1451,31 +1454,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc)); } - // Add callee saved registers to move list. - const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); - for (unsigned I = 0, E = CSI.size(); I != E; ++I) { - int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); - unsigned Reg = CSI[I].getReg(); - if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; - MachineLocation CSDst(MachineLocation::VirtualFP, Offset); - MachineLocation CSSrc(Reg); - Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc)); + if (MustSaveLR) { + MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); + MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); + Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); } - - MachineLocation LRDst(MachineLocation::VirtualFP, LROffset); - MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR); - Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc)); - - // Mark effective beginning of when frame pointer is ready. - unsigned ReadyLabelId = MMI->NextLabelID(); - BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); - - MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : - (IsPPC64 ? PPC::X1 : PPC::R1)); - MachineLocation FPSrc(MachineLocation::VirtualFP); - Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); } + unsigned ReadyLabelId = 0; + // If there is a frame pointer, copy R1 into R31 if (HasFP) { if (!IsPPC64) { @@ -1487,6 +1474,33 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const { .addReg(PPC::X1) .addReg(PPC::X1); } + + if (needsFrameMoves) { + ReadyLabelId = MMI->NextLabelID(); + + // Mark effective beginning of when frame pointer is ready. + BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId); + + MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) : + (IsPPC64 ? PPC::X1 : PPC::R1)); + MachineLocation FPSrc(MachineLocation::VirtualFP); + Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc)); + } + } + + if (needsFrameMoves) { + unsigned LabelId = HasFP ? ReadyLabelId : FrameLabelId; + + // Add callee saved registers to move list. + const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); + for (unsigned I = 0, E = CSI.size(); I != E; ++I) { + int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx()); + unsigned Reg = CSI[I].getReg(); + if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; + MachineLocation CSDst(MachineLocation::VirtualFP, Offset); + MachineLocation CSSrc(Reg); + Moves.push_back(MachineMove(LabelId, CSDst, CSSrc)); + } } } @@ -1700,7 +1714,7 @@ unsigned PPCRegisterInfo::getRARegister() const { return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8; } -unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const { if (!Subtarget.isPPC64()) return hasFP(MF) ? PPC::R31 : PPC::R1; else diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h index 1689bc2..3aeed80 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.h +++ b/lib/Target/PowerPC/PPCRegisterInfo.h @@ -83,7 +83,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; // Exception handling queries. diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 02c8ad7..75fcf62 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -101,8 +101,8 @@ public: const char *getTargetDataString() const { // Note, the alignment values for f64 and i64 on ppc64 in Darwin // documentation are wrong; these are correct (i.e. "what gcc does"). - return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128" - : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128"; + return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64" + : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32"; } /// isPPC64 - Return true if we are generating code for 64-bit pointer mode. diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 3371954..8079c6e 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -20,8 +20,7 @@ #include "llvm/Support/FormattedStream.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); bool isPPC64 = TheTriple.getArch() == Triple::ppc64; if (TheTriple.getOS() == Triple::Darwin) diff --git a/lib/Target/README.txt b/lib/Target/README.txt index a345d3d..aad621f 100644 --- a/lib/Target/README.txt +++ b/lib/Target/README.txt @@ -339,6 +339,8 @@ we don't have whole-function selection dags. On x86, this means we use one extra register for the function when effective_addr2 is declared as U64 than when it is declared U32. +PHI Slicing could be extended to do this. + //===---------------------------------------------------------------------===// LSR should know what GPR types a target has. This code: @@ -406,22 +408,6 @@ return: ; preds = %then.1, %else.0, %then.0 //===---------------------------------------------------------------------===// -Tail recursion elimination is not transforming this function, because it is -returning n, which fails the isDynamicConstant check in the accumulator -recursion checks. - -long long fib(const long long n) { - switch(n) { - case 0: - case 1: - return n; - default: - return fib(n-1) + fib(n-2); - } -} - -//===---------------------------------------------------------------------===// - Tail recursion elimination should handle: int pow2m1(int n) { @@ -1229,6 +1215,40 @@ GCC PR33344 is a similar case. //===---------------------------------------------------------------------===// +[PHI TRANSLATE INDEXED GEPs] PR5313 + +Load redundancy elimination for simple loop. This loop: + +void append_text(const char* text,unsigned char * const io) { + while(*text) + *io=*text++; +} + +Compiles to have a fully redundant load in the loop (%2): + +define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind { +entry: + %0 = load i8* %text, align 1 ; <i8> [#uses=1] + %1 = icmp eq i8 %0, 0 ; <i1> [#uses=1] + br i1 %1, label %return, label %bb + +bb: ; preds = %bb, %entry + %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; <i32> [#uses=2] + %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1] + %2 = load i8* %text_addr.04, align 1 ; <i8> [#uses=1] + store i8 %2, i8* %io, align 1 + %tmp = add i32 %indvar, 1 ; <i32> [#uses=2] + %scevgep = getelementptr i8* %text, i32 %tmp ; <i8*> [#uses=1] + %3 = load i8* %scevgep, align 1 ; <i8> [#uses=1] + %4 = icmp eq i8 %3, 0 ; <i1> [#uses=1] + br i1 %4, label %return, label %bb + +return: ; preds = %bb, %entry + ret void +} + +//===---------------------------------------------------------------------===// + There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the GCC testsuite. There are many pre testcases as ssa-pre-*.c @@ -1594,12 +1614,6 @@ int int_char(char m) {if(m>7) return 0; return m;} //===---------------------------------------------------------------------===// -IPSCCP is propagating elements of first class aggregates, but is not propagating -the entire aggregate itself. This leads it to miss opportunities, for example -in test/Transforms/SCCP/ipsccp-basic.ll:test5b. - -//===---------------------------------------------------------------------===// - int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; } Generates this: @@ -1668,3 +1682,55 @@ entry: } //===---------------------------------------------------------------------===// + +IPSCCP does not currently propagate argument dependent constants through +functions where it does not not all of the callers. This includes functions +with normal external linkage as well as templates, C99 inline functions etc. +Specifically, it does nothing to: + +define i32 @test(i32 %x, i32 %y, i32 %z) nounwind { +entry: + %0 = add nsw i32 %y, %z + %1 = mul i32 %0, %x + %2 = mul i32 %y, %z + %3 = add nsw i32 %1, %2 + ret i32 %3 +} + +define i32 @test2() nounwind { +entry: + %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind + ret i32 %0 +} + +It would be interesting extend IPSCCP to be able to handle simple cases like +this, where all of the arguments to a call are constant. Because IPSCCP runs +before inlining, trivial templates and inline functions are not yet inlined. +The results for a function + set of constant arguments should be memoized in a +map. + +//===---------------------------------------------------------------------===// + +The libcall constant folding stuff should be moved out of SimplifyLibcalls into +libanalysis' constantfolding logic. This would allow IPSCCP to be able to +handle simple things like this: + +static int foo(const char *X) { return strlen(X); } +int bar() { return foo("abcd"); } + +//===---------------------------------------------------------------------===// + +InstCombine should use SimplifyDemandedBits to remove the or instruction: + +define i1 @test(i8 %x, i8 %y) { + %A = or i8 %x, 1 + %B = icmp ugt i8 %A, 3 + ret i1 %B +} + +Currently instcombine calls SimplifyDemandedBits with either all bits or just +the sign bit, if the comparison is obviously a sign test. In this case, we only +need all but the bottom two bits from %A, and if we gave that mask to SDB it +would delete the or instruction for us. + +//===---------------------------------------------------------------------===// diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index 452b46f..cd85dd4 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -126,7 +126,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) { processDebugLoc(II, true); printInstruction(II); - if (VerboseAsm && !II->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*II); O << '\n'; processDebugLoc(II, false); diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp index a1a4a8e..b41917e 100644 --- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp +++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp @@ -75,7 +75,6 @@ private: /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void SparcDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); CurBB = BB; // Select target instructions for the DAG. SelectRoot(*CurDAG); diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index 164770d..133f828 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -129,7 +129,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, } InVals.push_back(Arg); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load; if (ObjectVT == MVT::i32) { @@ -163,7 +164,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg); InVals.push_back(Arg); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0); InVals.push_back(Load); @@ -184,7 +186,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi); HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } @@ -195,7 +198,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo); LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32); } else { - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0); } @@ -227,7 +231,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, MF.getRegInfo().addLiveIn(*CurArgReg, VReg); SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32); - int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset); + int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset, + true, false); SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0)); diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td index f2f1b96..d88d508 100644 --- a/lib/Target/Sparc/SparcInstrInfo.td +++ b/lib/Target/Sparc/SparcInstrInfo.td @@ -277,7 +277,7 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection. // Section A.3 - Synthetic Instructions, p. 85 // special cases of JMPL: -let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in { +let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in { let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp index 7883260..6f6183e 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.cpp +++ b/lib/Target/Sparc/SparcRegisterInfo.cpp @@ -175,7 +175,7 @@ unsigned SparcRegisterInfo::getRARegister() const { return SP::I7; } -unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return SP::I6; } diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h index 753b1c0..8889ea6 100644 --- a/lib/Target/Sparc/SparcRegisterInfo.h +++ b/lib/Target/Sparc/SparcRegisterInfo.h @@ -54,7 +54,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo { // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 3a38115..1eec112 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -29,7 +29,7 @@ extern "C" void LLVMInitializeSparcTarget() { SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, const std::string &FS) : LLVMTargetMachine(T, TT), - DataLayout("E-p:32:32-f128:128:128"), + DataLayout("E-p:32:32-f128:128:128-n32"), Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) { } diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp index a4a8d6a..e97e7ca 100644 --- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp +++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp @@ -33,6 +33,7 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegistry.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" @@ -154,7 +155,7 @@ void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) { // Call the autogenerated instruction printer routines. printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp index 028ee89..d64611d 100644 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -603,8 +603,6 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N, /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. void SystemZDAGToDAGISel::InstructionSelect() { - DEBUG(BB->dump()); - // Codegen the basic block. DEBUG(errs() << "===== Instruction selection begins:\n"); DEBUG(Indent = 0); diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 5c8cae0..d6b476e 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -34,6 +34,7 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/VectorExtras.h" using namespace llvm; @@ -328,7 +329,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain, // Create the nodes corresponding to a load from this parameter slot. // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, - VA.getLocMemOffset()); + VA.getLocMemOffset(), true, false); // Create the SelectionDAG nodes corresponding to a load // from this parameter diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 236711c..d82d928 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -22,7 +22,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" - +#include "llvm/Support/ErrorHandling.h" using namespace llvm; SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index 38460a6..4d1c01f 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -320,7 +320,8 @@ unsigned SystemZRegisterInfo::getRARegister() const { return 0; } -unsigned SystemZRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned +SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { assert(0 && "What is the frame register"); return 0; } diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index b22b05d..93f6aee 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -68,7 +68,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo { // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; // Exception handling queries. unsigned getEHExceptionRegister() const; diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 990e003..dfa26a1 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -28,7 +28,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T, : LLVMTargetMachine(T, TT), Subtarget(TT, FS), DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" - "-f64:64:64-f128:128:128-a0:16:16"), + "-f64:64:64-f128:128:128-a0:16:16-n32:64"), InstrInfo(*this), TLInfo(*this), FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) { diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index 5bcd658..fc71bc3 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -17,16 +17,16 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetData.h" -#include "llvm/Module.h" -#include "llvm/DerivedTypes.h" #include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Module.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/System/Mutex.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringExtras.h" #include <algorithm> #include <cstdlib> using namespace llvm; @@ -132,50 +132,18 @@ const TargetAlignElem TargetData::InvalidAlignmentElem = // TargetData Class Implementation //===----------------------------------------------------------------------===// -/*! - A TargetDescription string consists of a sequence of hyphen-delimited - specifiers for target endianness, pointer size and alignments, and various - primitive type sizes and alignments. A typical string looks something like: - <br><br> - "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64" - <br><br> - (note: this string is not fully specified and is only an example.) - \p - Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align, - below) dictates how a type will be aligned within an aggregate and when used - as an argument. Preferred alignment (pref_align, below) determines a type's - alignment when emitted as a global. - \p - Specifier string details: - <br><br> - <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e" - specifies a little-endian target data model. - <br><br> - <i>p:@verbatim<size>:<abi_align>:<pref_align>@endverbatim</i>: Pointer size, - ABI and preferred alignment. - <br><br> - <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type - alignment. Type is - one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector, or - aggregate. Size indicates the size, e.g., 32 or 64 bits. - \p - The default string, fully specified, is: - <br><br> - "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64" - "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64" - "-v64:64:64-v128:128:128" - <br><br> - Note that in the case of aggregates, 0 is the default ABI and preferred - alignment. This is a special case, where the aggregate's computed worst-case - alignment will be used. - */ -void TargetData::init(const std::string &TargetDescription) { - std::string temp = TargetDescription; - +/// getInt - Get an integer ignoring errors. +static unsigned getInt(StringRef R) { + unsigned Result = 0; + R.getAsInteger(10, Result); + return Result; +} + +void TargetData::init(StringRef Desc) { LayoutMap = 0; LittleEndian = false; PointerMemSize = 8; - PointerABIAlign = 8; + PointerABIAlign = 8; PointerPrefAlign = PointerABIAlign; // Default alignments @@ -190,11 +158,21 @@ void TargetData::init(const std::string &TargetDescription) { setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct - while (!temp.empty()) { - std::string token = getToken(temp, "-"); - std::string arg0 = getToken(token, ":"); - const char *p = arg0.c_str(); - switch(*p) { + while (!Desc.empty()) { + std::pair<StringRef, StringRef> Split = Desc.split('-'); + StringRef Token = Split.first; + Desc = Split.second; + + if (Token.empty()) + continue; + + Split = Token.split(':'); + StringRef Specifier = Split.first; + Token = Split.second; + + assert(!Specifier.empty() && "Can't be empty here"); + + switch (Specifier[0]) { case 'E': LittleEndian = false; break; @@ -202,9 +180,12 @@ void TargetData::init(const std::string &TargetDescription) { LittleEndian = true; break; case 'p': - PointerMemSize = atoi(getToken(token,":").c_str()) / 8; - PointerABIAlign = atoi(getToken(token,":").c_str()) / 8; - PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8; + Split = Token.split(':'); + PointerMemSize = getInt(Split.first) / 8; + Split = Split.second.split(':'); + PointerABIAlign = getInt(Split.first) / 8; + Split = Split.second.split(':'); + PointerPrefAlign = getInt(Split.first) / 8; if (PointerPrefAlign == 0) PointerPrefAlign = PointerABIAlign; break; @@ -213,28 +194,52 @@ void TargetData::init(const std::string &TargetDescription) { case 'f': case 'a': case 's': { - AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning - switch(*p) { - case 'i': align_type = INTEGER_ALIGN; break; - case 'v': align_type = VECTOR_ALIGN; break; - case 'f': align_type = FLOAT_ALIGN; break; - case 'a': align_type = AGGREGATE_ALIGN; break; - case 's': align_type = STACK_ALIGN; break; + AlignTypeEnum AlignType; + switch (Specifier[0]) { + default: + case 'i': AlignType = INTEGER_ALIGN; break; + case 'v': AlignType = VECTOR_ALIGN; break; + case 'f': AlignType = FLOAT_ALIGN; break; + case 'a': AlignType = AGGREGATE_ALIGN; break; + case 's': AlignType = STACK_ALIGN; break; } - uint32_t size = (uint32_t) atoi(++p); - unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8; - unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8; - if (pref_align == 0) - pref_align = abi_align; - setAlignment(align_type, abi_align, pref_align, size); + unsigned Size = getInt(Specifier.substr(1)); + Split = Token.split(':'); + unsigned char ABIAlign = getInt(Split.first) / 8; + + Split = Split.second.split(':'); + unsigned char PrefAlign = getInt(Split.first) / 8; + if (PrefAlign == 0) + PrefAlign = ABIAlign; + setAlignment(AlignType, ABIAlign, PrefAlign, Size); break; } + case 'n': // Native integer types. + Specifier = Specifier.substr(1); + do { + if (unsigned Width = getInt(Specifier)) + LegalIntWidths.push_back(Width); + Split = Token.split(':'); + Specifier = Split.first; + Token = Split.second; + } while (!Specifier.empty() || !Token.empty()); + break; + default: break; } } } +/// Default ctor. +/// +/// @note This has to exist, because this is a pass, but it should never be +/// used. +TargetData::TargetData() : ImmutablePass(&ID) { + llvm_report_error("Bad TargetData ctor used. " + "Tool did not specify a TargetData to use?"); +} + TargetData::TargetData(const Module *M) : ImmutablePass(&ID) { init(M->getDataLayout()); @@ -318,37 +323,130 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType, : Alignments[BestMatchIdx].PrefAlign; } -typedef DenseMap<const StructType*, StructLayout*>LayoutInfoTy; +typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy; -TargetData::~TargetData() { - if (!LayoutMap) - return; - - // Remove any layouts for this TD. - LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap); - for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) { - I->second->~StructLayout(); - free(I->second); - TheMap.erase(I++); +namespace llvm { + +class StructLayoutMap : public AbstractTypeUser { + LayoutInfoTy LayoutInfo; + + /// refineAbstractType - The callback method invoked when an abstract type is + /// resolved to another type. An object must override this method to update + /// its internal state to reference NewType instead of OldType. + /// + virtual void refineAbstractType(const DerivedType *OldTy, + const Type *) { + const StructType *STy = dyn_cast<const StructType>(OldTy); + if (!STy) { + OldTy->removeAbstractTypeUser(this); + return; + } + + StructLayout *SL = LayoutInfo[STy]; + if (SL) { + SL->~StructLayout(); + free(SL); + LayoutInfo[STy] = NULL; + } + + OldTy->removeAbstractTypeUser(this); } - - delete static_cast<LayoutInfoTy*>(LayoutMap); + + /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware + /// of is when a type makes the transition from being abstract (where it has + /// clients on its AbstractTypeUsers list) to concrete (where it does not). + /// This method notifies ATU's when this occurs for a type. + /// + virtual void typeBecameConcrete(const DerivedType *AbsTy) { + const StructType *STy = dyn_cast<const StructType>(AbsTy); + if (!STy) { + AbsTy->removeAbstractTypeUser(this); + return; + } + + StructLayout *SL = LayoutInfo[STy]; + if (SL) { + SL->~StructLayout(); + free(SL); + LayoutInfo[STy] = NULL; + } + + AbsTy->removeAbstractTypeUser(this); + } + + bool insert(const Type *Ty) { + if (Ty->isAbstract()) + Ty->addAbstractTypeUser(this); + return true; + } + +public: + virtual ~StructLayoutMap() { + // Remove any layouts. + for (LayoutInfoTy::iterator + I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) + if (StructLayout *SL = I->second) { + SL->~StructLayout(); + free(SL); + } + } + + inline LayoutInfoTy::iterator begin() { + return LayoutInfo.begin(); + } + inline LayoutInfoTy::iterator end() { + return LayoutInfo.end(); + } + inline LayoutInfoTy::const_iterator begin() const { + return LayoutInfo.begin(); + } + inline LayoutInfoTy::const_iterator end() const { + return LayoutInfo.end(); + } + + LayoutInfoTy::iterator find(const StructType *&Val) { + return LayoutInfo.find(Val); + } + LayoutInfoTy::const_iterator find(const StructType *&Val) const { + return LayoutInfo.find(Val); + } + + bool erase(const StructType *&Val) { + return LayoutInfo.erase(Val); + } + bool erase(LayoutInfoTy::iterator I) { + return LayoutInfo.erase(I); + } + + StructLayout *&operator[](const Type *Key) { + const StructType *STy = dyn_cast<const StructType>(Key); + assert(STy && "Trying to access the struct layout map with a non-struct!"); + insert(STy); + return LayoutInfo[STy]; + } + + // for debugging... + virtual void dump() const {} +}; + +} // end namespace llvm + +TargetData::~TargetData() { + delete LayoutMap; } const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { if (!LayoutMap) - LayoutMap = static_cast<void*>(new LayoutInfoTy()); - - LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap); + LayoutMap = new StructLayoutMap(); - StructLayout *&SL = TheMap[Ty]; + StructLayout *&SL = (*LayoutMap)[Ty]; if (SL) return SL; // Otherwise, create the struct layout. Because it is variable length, we // malloc it, then use placement new. int NumElts = Ty->getNumElements(); StructLayout *L = - (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t)); + (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t)); // Set SL before calling StructLayout's ctor. The ctor could cause other // entries to be added to TheMap, invalidating our reference. @@ -365,31 +463,35 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const { void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const { if (!LayoutMap) return; // No cache. - LayoutInfoTy* LayoutInfo = static_cast<LayoutInfoTy*>(LayoutMap); - LayoutInfoTy::iterator I = LayoutInfo->find(Ty); - if (I == LayoutInfo->end()) return; + DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty); + if (I == LayoutMap->end()) return; I->second->~StructLayout(); free(I->second); - LayoutInfo->erase(I); + LayoutMap->erase(I); } std::string TargetData::getStringRepresentation() const { - std::string repr; - repr.append(LittleEndian ? "e" : "E"); - repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))). - append(":").append(itostr((int64_t) (PointerABIAlign * 8))). - append(":").append(itostr((int64_t) (PointerPrefAlign * 8))); - for (align_const_iterator I = Alignments.begin(); - I != Alignments.end(); - ++I) { - repr.append("-").append(1, (char) I->AlignType). - append(utostr((int64_t) I->TypeBitWidth)). - append(":").append(utostr((uint64_t) (I->ABIAlign * 8))). - append(":").append(utostr((uint64_t) (I->PrefAlign * 8))); + std::string Result; + raw_string_ostream OS(Result); + + OS << (LittleEndian ? "e" : "E") + << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8 + << ':' << PointerPrefAlign*8; + for (unsigned i = 0, e = Alignments.size(); i != e; ++i) { + const TargetAlignElem &AI = Alignments[i]; + OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':' + << AI.ABIAlign*8 << ':' << AI.PrefAlign*8; + } + + if (!LegalIntWidths.empty()) { + OS << "-n" << (unsigned)LegalIntWidths[0]; + + for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i) + OS << ':' << (unsigned)LegalIntWidths[i]; } - return repr; + return OS.str(); } diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp index c1aab99..f887523 100644 --- a/lib/Target/TargetLoweringObjectFile.cpp +++ b/lib/Target/TargetLoweringObjectFile.cpp @@ -24,6 +24,7 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" @@ -151,7 +152,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, // relocation, then we may have to drop this into a wriable data section // even though it is marked const. switch (C->getRelocationInfo()) { - default: llvm_unreachable("unknown relocation info kind"); + default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: // If initializer is a null-terminated string, put it in a "cstring" // section of the right width. @@ -219,7 +220,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV, return SectionKind::getDataNoRel(); switch (C->getRelocationInfo()) { - default: llvm_unreachable("unknown relocation info kind"); + default: assert(0 && "unknown relocation info kind"); case Constant::NoRelocation: return SectionKind::getDataNoRel(); case Constant::LocalRelocation: @@ -671,7 +672,7 @@ TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() { const MCSectionMachO *TargetLoweringObjectFileMachO:: -getMachOSection(const StringRef &Segment, const StringRef &Section, +getMachOSection(StringRef Segment, StringRef Section, unsigned TypeAndAttributes, unsigned Reserved2, SectionKind Kind) const { // We unique sections by their segment/section pair. The returned section diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp index 95c92ca..edb76f9 100644 --- a/lib/Target/TargetSubtarget.cpp +++ b/lib/Target/TargetSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetSubtarget.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; //--------------------------------------------------------------------------- @@ -20,3 +21,13 @@ using namespace llvm; TargetSubtarget::TargetSubtarget() {} TargetSubtarget::~TargetSubtarget() {} + +bool TargetSubtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = ANTIDEP_NONE; + CriticalPathRCs.clear(); + return false; +} + diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index ae8e6d3..b88063f 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -651,7 +651,7 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) { printInstructionThroughMCStreamer(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp index 821cca4..be9f4b2 100644 --- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp +++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp @@ -21,6 +21,7 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/Mangler.h" #include "llvm/ADT/SmallString.h" @@ -405,7 +406,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) { printLabel(MI); return; case TargetInstrInfo::INLINEASM: - O << '\t'; printInlineAsm(MI); return; case TargetInstrInfo::IMPLICIT_DEF: diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index a0bded3..4497931 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -82,7 +82,7 @@ namespace { void emitPCRelativeBlockAddress(MachineBasicBlock *MBB); void emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0, - bool NeedStub = false, bool Indirect = false); + bool Indirect = false); void emitExternalSymbolAddress(const char *ES, unsigned Reloc); void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0, intptr_t PCAdj = 0); @@ -176,7 +176,6 @@ template<class CodeEmitter> void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, intptr_t Disp /* = 0 */, intptr_t PCAdj /* = 0 */, - bool NeedStub /* = false */, bool Indirect /* = false */) { intptr_t RelocCST = Disp; if (Reloc == X86::reloc_picrel_word) @@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc, RelocCST = PCAdj; MachineRelocation MR = Indirect ? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, NeedStub) + GV, RelocCST, false) : MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - GV, RelocCST, NeedStub); + GV, RelocCST, false); MCE.addRelocation(MR); // The relocated value will be added to the displacement if (Reloc == X86::reloc_absolute_dword) @@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp, // do it, otherwise fallback to absolute (this is determined by IsPCRel). // 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative // 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute - bool NeedStub = isa<Function>(RelocOp->getGlobal()); bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM); emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(), - Adj, NeedStub, Indirect); + Adj, Indirect); } else if (RelocOp->isSymbol()) { emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType); } else if (RelocOp->isCPI()) { @@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, } if (MO.isGlobal()) { - // Assume undefined functions may be outside the Small codespace. - bool NeedStub = - (Is64BitMode && - (TM.getCodeModel() == CodeModel::Large || - TM.getSubtarget<X86Subtarget>().isTargetDarwin())) || - Opcode == X86::TAILJMPd; emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word, - MO.getOffset(), 0, NeedStub); + MO.getOffset(), 0); break; } @@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri) rt = X86::reloc_absolute_dword; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool NeedStub = isa<Function>(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64ri32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO1.isGlobal()) { - bool NeedStub = isa<Function>(MO1.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO1, TM); emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO1.isSymbol()) emitExternalSymbolAddress(MO1.getSymbolName(), rt); else if (MO1.isCPI()) @@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI, if (Opcode == X86::MOV64mi32) rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag? if (MO.isGlobal()) { - bool NeedStub = isa<Function>(MO.getGlobal()); bool Indirect = gvNeedsNonLazyPtr(MO, TM); emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0, - NeedStub, Indirect); + Indirect); } else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), rt); else if (MO.isCPI()) diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 3401df0..431c120 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1493,7 +1493,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) { EVT ResVT = RVLocs[0].getValVT(); unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; unsigned MemSize = ResVT.getSizeInBits()/8; - int FI = MFI.CreateStackObject(MemSize, MemSize); + int FI = MFI.CreateStackObject(MemSize, MemSize, false); addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); DstRC = ResVT == MVT::f32 ? X86::FR32RegisterClass : X86::FR64RegisterClass; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 122f515..6a3577a 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -12,6 +12,15 @@ // //===----------------------------------------------------------------------===// +// Force NDEBUG on in any optimized build on Darwin. +// +// FIXME: This is a huge hack, to work around ridiculously awful compile times +// on this file with gcc-4.2 on Darwin, in Release mode. +#if (!defined(__llvm__) && defined(__APPLE__) && \ + defined(__OPTIMIZE__) && !defined(NDEBUG)) +#define NDEBUG +#endif + #define DEBUG_TYPE "x86-isel" #include "X86.h" #include "X86InstrBuilder.h" @@ -661,7 +670,6 @@ void X86DAGToDAGISel::InstructionSelect() { const Function *F = MF->getFunction(); OptForSize = F->hasFnAttr(Attribute::OptimizeForSize); - DEBUG(BB->dump()); if (OptLevel != CodeGenOpt::None) PreprocessForRMW(); @@ -1950,14 +1958,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) { 0); // We just did a 32-bit clear, insert it into a 64-bit register to // clear the whole 64-bit reg. - SDValue Undef = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF, - dl, MVT::i64), 0); + SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64); SDValue SubRegNo = CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32); ClrNode = - SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl, - MVT::i64, Undef, ClrNode, SubRegNo), + SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl, + MVT::i64, Zero, ClrNode, SubRegNo), 0); } else { ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 86ec9f2..6018cf5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1087,6 +1087,17 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { #include "X86GenCallingConv.inc" +bool +X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86); +} + SDValue X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -1370,7 +1381,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, // In case of tail call optimization mark all arguments mutable. Since they // could be overwritten by lowering of arguments in case of a tail call. int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, - VA.getLocMemOffset(), isImmutable); + VA.getLocMemOffset(), isImmutable, false); SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) return FIN; @@ -1499,7 +1510,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // the start of the first vararg value... for expansion of llvm.va_start. if (isVarArg) { if (Is64Bit || CallConv != CallingConv::X86_FastCall) { - VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize); + VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false); } if (Is64Bit) { unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0; @@ -1550,7 +1561,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, VarArgsGPOffset = NumIntRegs * 8; VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16; RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 + - TotalNumXMMRegs * 16, 16); + TotalNumXMMRegs * 16, 16, + false); // Store the integer parameter registers. SmallVector<SDValue, 8> MemOps; @@ -1671,7 +1683,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, // Calculate the new stack slot for the return address. int SlotSize = Is64Bit ? 8 : 4; int NewReturnAddrFI = - MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize); + MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, + true, false); EVT VT = Is64Bit ? MVT::i64 : MVT::i32; SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT); Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, @@ -1884,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, // Create frame index. int32_t Offset = VA.getLocMemOffset()+FPDiff; uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; - FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); + FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false); FIN = DAG.getFrameIndex(FI, getPointerTy()); if (Flags.isByVal()) { @@ -1924,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, FPDiff, dl); } - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + bool WasGlobalOrExternal = false; + if (getTargetMachine().getCodeModel() == CodeModel::Large) { + assert(Is64Bit && "Large code model is only legal in 64-bit mode."); + // In the 64-bit large code model, we have to make all calls + // through a register, since the call instruction's 32-bit + // pc-relative offset may not be large enough to hold the whole + // address. + } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { + WasGlobalOrExternal = true; + // If the callee is a GlobalAddress node (quite common, every direct call + // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack + // it. + // We should use extra load for direct calls to dllimported functions in // non-JIT mode. GlobalValue *GV = G->getGlobal(); @@ -1954,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, G->getOffset(), OpFlags); } } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { + WasGlobalOrExternal = true; unsigned char OpFlags = 0; // On ELF targets, in either X86-64 or X86-32 mode, direct calls to external @@ -1971,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(), OpFlags); - } else if (isTailCall) { + } + + if (isTailCall && !WasGlobalOrExternal) { unsigned Opc = Is64Bit ? X86::R11 : X86::EAX; Chain = DAG.getCopyToReg(Chain, dl, @@ -2169,7 +2195,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. uint64_t SlotSize = TD->getPointerSize(); - ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize); + ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, + true, false); FuncInfo->setRAIndex(ReturnAddrIndex); } @@ -2517,6 +2544,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) { isUndefOrEqual(N->getMaskElt(3), 3); } +/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form +/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, +/// <2, 3, 2, 3> +bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { + unsigned NumElems = N->getValueType(0).getVectorNumElements(); + + if (NumElems != 4) + return false; + + return isUndefOrEqual(N->getMaskElt(0), 2) && + isUndefOrEqual(N->getMaskElt(1), 3) && + isUndefOrEqual(N->getMaskElt(2), 2) && + isUndefOrEqual(N->getMaskElt(3), 3); +} + /// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}. bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { @@ -2536,10 +2578,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand -/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D} -/// and MOVLHPS. -bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { +/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand +/// specifies a shuffle of elements that is suitable for input to MOVLHPS. +bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) { unsigned NumElems = N->getValueType(0).getVectorNumElements(); if (NumElems != 2 && NumElems != 4) @@ -2556,21 +2597,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) { return true; } -/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form -/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef, -/// <2, 3, 2, 3> -bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) { - unsigned NumElems = N->getValueType(0).getVectorNumElements(); - - if (NumElems != 4) - return false; - - return isUndefOrEqual(N->getMaskElt(0), 2) && - isUndefOrEqual(N->getMaskElt(1), 3) && - isUndefOrEqual(N->getMaskElt(2), 2) && - isUndefOrEqual(N->getMaskElt(3), 3); -} - /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT, @@ -4264,7 +4290,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { if (!isMMX && (X86::isMOVSHDUPMask(SVOp) || X86::isMOVSLDUPMask(SVOp) || X86::isMOVHLPSMask(SVOp) || - X86::isMOVHPMask(SVOp) || + X86::isMOVLHPSMask(SVOp) || X86::isMOVLPMask(SVOp))) return Op; @@ -4961,7 +4987,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); unsigned Size = SrcVT.getSizeInBits()/8; MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size); + int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot, @@ -4995,7 +5021,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, // shouldn't be necessary except that RFP cannot be live across // multiple blocks. When stackifier is fixed, they can be uncoupled. MachineFunction &MF = DAG.getMachineFunction(); - int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); + int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); Tys = DAG.getVTList(MVT::Other); SmallVector<SDValue, 8> Ops; @@ -5205,7 +5231,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { // stack slot. MachineFunction &MF = DAG.getMachineFunction(); unsigned MemSize = DstTy.getSizeInBits()/8; - int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); unsigned Opc; @@ -5228,7 +5254,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) { }; Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3); Chain = Value.getValue(1); - SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize); + SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false); StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); } @@ -6752,7 +6778,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) { DebugLoc dl = Op.getDebugLoc(); // Save FP Control Word to stack slot - int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment); + int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false); SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy()); SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other, @@ -7977,7 +8003,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. MachineFunction *F = BB->getParent(); - int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2); + int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); // Load the old value of the high byte of the control word... @@ -9585,14 +9611,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, } // GCC allows "st(0)" to be called just plain "st". - if (StringsEqualNoCase("{st}", Constraint)) { + if (StringRef("{st}").equals_lower(Constraint)) { Res.first = X86::ST0; Res.second = X86::RFP80RegisterClass; return Res; } // flags -> EFLAGS - if (StringsEqualNoCase("{flags}", Constraint)) { + if (StringRef("{flags}").equals_lower(Constraint)) { Res.first = X86::EFLAGS; Res.second = X86::CCRRegisterClass; return Res; diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 7b59b81..7b4ab62 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -286,7 +286,7 @@ namespace llvm { /// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for MOVHP{S|D}. /// as well as MOVLHPS. - bool isMOVHPMask(ShuffleVectorSDNode *N); + bool isMOVLHPSMask(ShuffleVectorSDNode *N); /// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a shuffle of elements that is suitable for input to UNPCKL. @@ -699,6 +699,12 @@ namespace llvm { const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + virtual bool + CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG); + void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG, unsigned NewOp); diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 3edced7..a01534b 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -309,7 +309,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src), [(set GR64:$dst, i64immSExt32:$src)]>; } -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), "mov{q}\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (load addr:$src))]>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 87bc10d..1ddceb1 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -26,11 +26,15 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" + +#include <limits> + using namespace llvm; static cl::opt<bool> @@ -707,9 +711,23 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, } } -unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +/// isFrameOperand - Return true and the FrameIndex if the specified +/// operand and follow operands form a reference to the stack frame. +bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, + int &FrameIndex) const { + if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && + MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && + MI->getOperand(Op+1).getImm() == 1 && + MI->getOperand(Op+2).getReg() == 0 && + MI->getOperand(Op+3).getImm() == 0) { + FrameIndex = MI->getOperand(Op).getIndex(); + return true; + } + return false; +} + +static bool isFrameLoadOpcode(int Opcode) { + switch (Opcode) { default: break; case X86::MOV8rm: case X86::MOV16rm: @@ -723,22 +741,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, case X86::MOVDQArm: case X86::MMX_MOVD64rm: case X86::MMX_MOVQ64rm: - if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && - MI->getOperand(3).isReg() && MI->getOperand(4).isImm() && - MI->getOperand(2).getImm() == 1 && - MI->getOperand(3).getReg() == 0 && - MI->getOperand(4).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } + return true; break; } - return 0; + return false; } -unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { +static bool isFrameStoreOpcode(int Opcode) { + switch (Opcode) { default: break; case X86::MOV8mr: case X86::MOV16mr: @@ -753,19 +763,83 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, case X86::MMX_MOVD64mr: case X86::MMX_MOVQ64mr: case X86::MMX_MOVNTQmr: - if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && - MI->getOperand(2).isReg() && MI->getOperand(3).isImm() && - MI->getOperand(1).getImm() == 1 && - MI->getOperand(2).getReg() == 0 && - MI->getOperand(3).getImm() == 0) { - FrameIndex = MI->getOperand(0).getIndex(); + return true; + } + return false; +} + +unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameLoadOpcode(MI->getOpcode())) + if (isFrameOperand(MI, 1, FrameIndex)) + return MI->getOperand(0).getReg(); + return 0; +} + +unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameLoadOpcode(MI->getOpcode())) { + unsigned Reg; + if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) + return Reg; + // Check for post-frame index elimination operations + return hasLoadFromStackSlot(MI, FrameIndex); + } + return 0; +} + +bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + return true; + } + } + return false; +} + +unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameStoreOpcode(MI->getOpcode())) + if (isFrameOperand(MI, 0, FrameIndex)) return MI->getOperand(X86AddrNumOperands).getReg(); - } - break; + return 0; +} + +unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const { + if (isFrameStoreOpcode(MI->getOpcode())) { + unsigned Reg; + if ((Reg = isStoreToStackSlot(MI, FrameIndex))) + return Reg; + // Check for post-frame index elimination operations + return hasStoreToStackSlot(MI, FrameIndex); } return 0; } +bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + return true; + } + } + return false; +} + /// regIsPICBase - Return true if register is PIC base (i.e.g defined by /// X86::MOVPC32r. static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { @@ -794,10 +868,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, case X86::MOVSSrm: case X86::MOVSDrm: case X86::MOVAPSrm: + case X86::MOVUPSrm: + case X86::MOVUPSrm_Int: case X86::MOVAPDrm: case X86::MOVDQArm: case X86::MMX_MOVD64rm: - case X86::MMX_MOVQ64rm: { + case X86::MMX_MOVQ64rm: + case X86::FsMOVAPSrm: + case X86::FsMOVAPDrm: { // Loads from constant pools are trivially rematerializable. if (MI->getOperand(1).isReg() && MI->getOperand(2).isImm() && @@ -917,12 +995,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const { + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const { DebugLoc DL = DebugLoc::getUnknownLoc(); if (I != MBB.end()) DL = I->getDebugLoc(); if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { - DestReg = RI.getSubReg(DestReg, SubIdx); + DestReg = TRI->getSubReg(DestReg, SubIdx); SubIdx = 0; } @@ -1891,8 +1970,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); @@ -1985,8 +2063,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, MachineInstr::mmo_iterator MMOBegin, MachineInstr::mmo_iterator MMOEnd, SmallVectorImpl<MachineInstr*> &NewMIs) const { - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + bool isAligned = (*MMOBegin)->getAlignment() >= 16; unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); DebugLoc DL = DebugLoc::getUnknownLoc(); MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); @@ -2170,7 +2247,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // If table selected... if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = OpcodeTablePtr->find((unsigned*)MI->getOpcode()); if (I != OpcodeTablePtr->end()) { unsigned Opcode = I->second.first; @@ -2402,7 +2479,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, if (OpcodeTablePtr) { // Find the Opcode to fuse - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = OpcodeTablePtr->find((unsigned*)Opc); if (I != OpcodeTablePtr->end()) return true; @@ -2413,7 +2490,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl<MachineInstr*> &NewMIs) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); if (I == MemOp2RegOpTable.end()) return false; @@ -2530,7 +2607,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, if (!N->isMachineOpcode()) return false; - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); if (I == MemOp2RegOpTable.end()) return false; @@ -2563,17 +2640,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, MachineFunction &MF = DAG.getMachineFunction(); if (FoldedLoad) { EVT VT = *RC->vt_begin(); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), + cast<MachineSDNode>(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, VT, MVT::Other, &AddrOps[0], AddrOps.size()); NewNodes.push_back(Load); // Preserve memory reference information. - std::pair<MachineInstr::mmo_iterator, - MachineInstr::mmo_iterator> MMOs = - MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), - cast<MachineSDNode>(N)->memoperands_end()); cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2601,8 +2677,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, AddrOps.pop_back(); AddrOps.push_back(SDValue(NewNode, 0)); AddrOps.push_back(Chain); - bool isAligned = (RI.getStackAlignment() >= 16) || - RI.needsStackRealignment(MF); + std::pair<MachineInstr::mmo_iterator, + MachineInstr::mmo_iterator> MMOs = + MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), + cast<MachineSDNode>(N)->memoperands_end()); + bool isAligned = (*MMOs.first)->getAlignment() >= 16; SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, TM), dl, MVT::Other, @@ -2610,10 +2689,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, NewNodes.push_back(Store); // Preserve memory reference information. - std::pair<MachineInstr::mmo_iterator, - MachineInstr::mmo_iterator> MMOs = - MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), - cast<MachineSDNode>(N)->memoperands_end()); cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); } @@ -2623,7 +2698,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore, unsigned *LoadRegIndex) const { - DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = + DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = MemOp2RegOpTable.find((unsigned*)Opc); if (I == MemOp2RegOpTable.end()) return 0; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 6eb07d5..c6daa25 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -449,13 +449,41 @@ public: unsigned &SrcSubIdx, unsigned &DstSubIdx) const; unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination + /// stack locations as well. This uses a heuristic so it isn't + /// reliable for correctness. + unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + + /// hasLoadFromStackSlot - If the specified machine instruction has + /// a load from a stack slot, return true along with the FrameIndex + /// of the loaded stack slot. If not, return false. Unlike + /// isLoadFromStackSlot, this returns true for any instructions that + /// loads from the stack. This is a hint only and may not catch all + /// cases. + bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; + unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; + /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination + /// stack locations as well. This uses a heuristic so it isn't + /// reliable for correctness. + unsigned isStoreToStackSlotPostFE(const MachineInstr *MI, + int &FrameIndex) const; + + /// hasStoreToStackSlot - If the specified machine instruction has a + /// store to a stack slot, return true along with the FrameIndex of + /// the loaded stack slot. If not, return false. Unlike + /// isStoreToStackSlot, this returns true for any instructions that + /// loads from the stack. This is a hint only and may not catch all + /// cases. + bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; bool isReallyTriviallyReMaterializable(const MachineInstr *MI, AliasAnalysis *AA) const; void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, - const MachineInstr *Orig) const; + const MachineInstr *Orig, + const TargetRegisterInfo *TRI) const; /// convertToThreeAddress - This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -610,6 +638,11 @@ private: unsigned OpNum, const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Alignment) const; + + /// isFrameOperand - Return true and the FrameIndex if the specified + /// operand and follow operands form a reference to the stack frame. + bool isFrameOperand(const MachineInstr *MI, unsigned int Op, + int &FrameIndex) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 9b82e1e..a79f262 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -543,7 +543,7 @@ let neverHasSideEffects = 1 in { } // Trap -def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>; +def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>; def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>; // PIC base construction. This expands to code that looks like this: diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index be242a0..ee63d56 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs), return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N)); }]>; -def movhp : PatFrag<(ops node:$lhs, node:$rhs), - (vector_shuffle node:$lhs, node:$rhs), [{ - return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N)); +def movlhps : PatFrag<(ops node:$lhs, node:$rhs), + (vector_shuffle node:$lhs, node:$rhs), [{ + return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N)); }]>; def movlp : PatFrag<(ops node:$lhs, node:$rhs), @@ -497,7 +497,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src), // Alias instruction to load FR32 from f128mem using movaps. Upper bits are // disregarded. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), "movaps\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (alignedloadfsf32 addr:$src))]>; @@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), let neverHasSideEffects = 1 in def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movups\t{$src, $dst|$dst, $src}", []>; -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (loadv4f32 addr:$src))]>; @@ -715,7 +715,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), [(store (v4f32 VR128:$src), addr:$dst)]>; // Intrinsic forms of MOVUPS load and store -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), "movups\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>; @@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (movhp VR128:$src1, + (movlhps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), "movlhps\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v4f32 (movhp VR128:$src1, VR128:$src2)))]>; + (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>; def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), @@ -1256,7 +1256,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src), // Alias instruction to load FR64 from f128mem using movapd. Upper bits are // disregarded. -let canFoldAsLoad = 1 in +let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), "movapd\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (alignedloadfsf64 addr:$src))]>; @@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in { (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), "movhpd\t{$src2, $dst|$dst, $src2}", [(set VR128:$dst, - (v2f64 (movhp VR128:$src1, + (v2f64 (movlhps VR128:$src1, (scalar_to_vector (loadf64 addr:$src2)))))]>; } // AddedComplexity } // Constraints = "$src1 = $dst" @@ -2085,7 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem, [(set VR128:$dst, (v4i32 (pshufd:$src2 (bc_v4i32(memopv2i64 addr:$src1)), (undef))))]>; -} +} // SSE2 with ImmT == Imm8 and XS prefix. def PSHUFHWri : Ii8<0x70, MRMSrcReg, @@ -2874,7 +2874,7 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)), (PALIGNR128rr VR128:$src2, VR128:$src1, (SHUFFLE_get_palign_imm VR128:$src3))>, Requires<[HasSSSE3]>; -} +} def : Pat<(X86pshufb VR128:$src, VR128:$mask), (PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>; @@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS -def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)), +def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)), (MOVLHPSrr VR128:$src1, VR128:$src2)>; // vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS @@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))), let AddedComplexity = 20 in { // vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS -// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; - def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))), (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))), (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; -def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>; -def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))), - (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>; } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS -// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; -def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), - addr:$src1), - (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>; -def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1), - (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>; - let AddedComplexity = 15 in { // Setting the lowest element in the vector. diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index 62ca47f..0792bdd 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) { // Rewrite the call target... so that we don't end up here every time we // execute the call. #if defined (X86_64_JIT) - if (!isStub) - *(intptr_t *)(RetAddr - 0xa) = NewVal; + assert(isStub && + "X86-64 doesn't support rewriting non-stub lazy compilation calls:" + " the call instruction varies too much."); #else *(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4); #endif diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index c5ff525..f577fcf 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -392,6 +392,11 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(X86::SP); Reserved.set(X86::SPL); + // Set the instruction pointer register and its aliases as reserved. + Reserved.set(X86::RIP); + Reserved.set(X86::EIP); + Reserved.set(X86::IP); + // Set the frame-pointer register and its aliases as reserved if needed. if (hasFP(MF)) { Reserved.set(X86::RBP); @@ -450,12 +455,17 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + bool requiresRealignment = + RealignStack && (MFI->getMaxAlignment() > StackAlign); // FIXME: Currently we don't support stack realignment for functions with - // variable-sized allocas - return (RealignStack && - (MFI->getMaxAlignment() > StackAlign && - !MFI->hasVarSizedObjects())); + // variable-sized allocas. + // FIXME: Temporary disable the error - it seems to be too conservative. + if (0 && requiresRealignment && MFI->hasVarSizedObjects()) + llvm_report_error( + "Stack realignment in presense of dynamic allocas is not supported"); + + return (requiresRealignment && !MFI->hasVarSizedObjects()); } bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { @@ -610,8 +620,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // Offset is a 32-bit integer. int Offset = getFrameIndexOffset(MF, FrameIndex) + (int)(MI.getOperand(i + 3).getImm()); - - MI.getOperand(i + 3).ChangeToImmediate(Offset); + + MI.getOperand(i + 3).ChangeToImmediate(Offset); } else { // Offset is symbolic. This is extremely rare. uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) + @@ -647,7 +657,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // } // [EBP] MFI->CreateFixedObject(-TailCallReturnAddrDelta, - (-1U*SlotSize)+TailCallReturnAddrDelta); + (-1U*SlotSize)+TailCallReturnAddrDelta, + true, false); } if (hasFP(MF)) { @@ -659,7 +670,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx = MFI->CreateFixedObject(SlotSize, -(int)SlotSize + TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta); + TailCallReturnAddrDelta, + true, false); assert(FrameIdx == MFI->getObjectIndexBegin() && "Slot for EBP register must be last in order to be found!"); FrameIdx = 0; @@ -1271,7 +1283,7 @@ unsigned X86RegisterInfo::getRARegister() const { : X86::EIP; // Should have dwarf #8. } -unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return hasFP(MF) ? FramePtr : StackPtr; } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index f635707..f281a3c 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -153,7 +153,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; int getFrameIndexOffset(MachineFunction &MF, int FI) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9525f04..b901c14 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -18,8 +18,10 @@ #include "llvm/GlobalValue.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/System/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/SmallVector.h" using namespace llvm; #if defined(_MSC_VER) @@ -257,118 +259,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } } -static const char *GetCurrentX86CPU() { - unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0; - if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX)) - return "generic"; - unsigned Family = 0; - unsigned Model = 0; - DetectFamilyModel(EAX, Family, Model); - - GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); - bool Em64T = (EDX >> 29) & 0x1; - bool HasSSE3 = (ECX & 0x1); - - union { - unsigned u[3]; - char c[12]; - } text; - - GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); - if (memcmp(text.c, "GenuineIntel", 12) == 0) { - switch (Family) { - case 3: - return "i386"; - case 4: - return "i486"; - case 5: - switch (Model) { - case 4: return "pentium-mmx"; - default: return "pentium"; - } - case 6: - switch (Model) { - case 1: return "pentiumpro"; - case 3: - case 5: - case 6: return "pentium2"; - case 7: - case 8: - case 10: - case 11: return "pentium3"; - case 9: - case 13: return "pentium-m"; - case 14: return "yonah"; - case 15: - case 22: // Celeron M 540 - return "core2"; - case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE) - return "penryn"; - default: return "i686"; - } - case 15: { - switch (Model) { - case 3: - case 4: - case 6: // same as 4, but 65nm - return (Em64T) ? "nocona" : "prescott"; - case 26: - return "corei7"; - case 28: - return "atom"; - default: - return (Em64T) ? "x86-64" : "pentium4"; - } - } - - default: - return "generic"; - } - } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) { - // FIXME: this poorly matches the generated SubtargetFeatureKV table. There - // appears to be no way to generate the wide variety of AMD-specific targets - // from the information returned from CPUID. - switch (Family) { - case 4: - return "i486"; - case 5: - switch (Model) { - case 6: - case 7: return "k6"; - case 8: return "k6-2"; - case 9: - case 13: return "k6-3"; - default: return "pentium"; - } - case 6: - switch (Model) { - case 4: return "athlon-tbird"; - case 6: - case 7: - case 8: return "athlon-mp"; - case 10: return "athlon-xp"; - default: return "athlon"; - } - case 15: - if (HasSSE3) { - return "k8-sse3"; - } else { - switch (Model) { - case 1: return "opteron"; - case 5: return "athlon-fx"; // also opteron - default: return "athlon64"; - } - } - case 16: - return "amdfam10"; - default: - return "generic"; - } - } else { - return "generic"; - } -} - X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit) : PICStyle(PICStyles::None) @@ -395,7 +285,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, // Determine default and user specified characteristics if (!FS.empty()) { // If feature string is not empty, parse features string. - std::string CPU = GetCurrentX86CPU(); + std::string CPU = sys::getHostCPUName(); ParseSubtargetFeatures(FS, CPU); // All X86-64 CPUs also have SSE2, however user might request no SSE via // -mattr, so don't force SSELevel here. @@ -455,3 +345,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, if (StackAlignment) stackAlignment = StackAlignment; } + +bool X86Subtarget::enablePostRAScheduler( + CodeGenOpt::Level OptLevel, + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const { + Mode = TargetSubtarget::ANTIDEP_CRITICAL; + CriticalPathRCs.clear(); + return OptLevel >= CodeGenOpt::Default; +} diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index e64b854..23f2841 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -166,11 +166,11 @@ public: std::string getDataLayout() const { const char *p; if (is64Bit()) - p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128"; + p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"; else if (isTargetDarwin()) - p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128"; + p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32"; else - p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32"; + p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"; return std::string(p); } @@ -219,10 +219,8 @@ public: /// enablePostRAScheduler - X86 target is enabling post-alloc scheduling /// at 'More' optimization level. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtarget::AntiDepBreakMode& mode) const { - mode = TargetSubtarget::ANTIDEP_CRITICAL; - return OptLevel >= CodeGenOpt::Default; - } + TargetSubtarget::AntiDepBreakMode& Mode, + RegClassVector& CriticalPathRCs) const; }; } // End llvm namespace diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index a61de1c..0cda8bc 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -22,8 +22,7 @@ #include "llvm/Target/TargetRegistry.h" using namespace llvm; -static const MCAsmInfo *createMCAsmInfo(const Target &T, - const StringRef &TT) { +static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) { Triple TheTriple(TT); switch (TheTriple.getOS()) { case Triple::Darwin: @@ -186,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86CodeEmitterPass(*this, MCE)); @@ -212,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, } // 64-bit JIT places everything in the same buffer except external functions. - // On Darwin, use small code model but hack the call instruction for - // externals. Elsewhere, do not assume globals are in the lower 4G. - if (Subtarget.is64Bit()) { - if (Subtarget.isTargetDarwin()) - setCodeModel(CodeModel::Small); - else + if (Subtarget.is64Bit()) setCodeModel(CodeModel::Large); - } PM.add(createX86JITCodeEmitterPass(*this, JCE)); diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp index bc1bbc3..d7106a0 100644 --- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp +++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp @@ -361,7 +361,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) { return; } printInstruction(MI); - if (VerboseAsm && !MI->getDebugLoc().isUnknown()) + if (VerboseAsm) EmitComments(*MI); O << '\n'; diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 860b72f..da2fb04 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -149,10 +149,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr, /// InstructionSelect - This callback is invoked by /// SelectionDAGISel when it has created a SelectionDAG for us to codegen. -void XCoreDAGToDAGISel:: -InstructionSelect() { - DEBUG(BB->dump()); - +void XCoreDAGToDAGISel::InstructionSelect() { // Select target instructions for the DAG. SelectRoot(*CurDAG); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 5ef56c9..16e68fe 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -860,7 +860,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, } // Create the frame index object for this incoming parameter... int FI = MFI->CreateFixedObject(ObjSize, - LRSaveSize + VA.getLocMemOffset()); + LRSaveSize + VA.getLocMemOffset(), + true, false); // Create the SelectionDAG nodes corresponding to a load //from this parameter @@ -884,7 +885,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // address for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) { // Create a stack slot - int FI = MFI->CreateFixedObject(4, offset); + int FI = MFI->CreateFixedObject(4, offset, true, false); if (i == FirstVAReg) { XFI->setVarArgsFrameIndex(FI); } @@ -905,7 +906,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, } else { // This will point to the next argument passed via stack. XFI->setVarArgsFrameIndex( - MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset())); + MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(), + true, false)); } } @@ -916,6 +918,17 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// +bool XCoreTargetLowering:: +CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG) { + SmallVector<CCValAssign, 16> RVLocs; + CCState CCInfo(CallConv, isVarArg, getTargetMachine(), + RVLocs, *DAG.getContext()); + return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore); +} + SDValue XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index ef8555e..10631af 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -159,6 +159,12 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl<ISD::OutputArg> &Outs, DebugLoc dl, SelectionDAG &DAG); + + virtual bool + CanLowerReturn(CallingConv::ID CallConv, bool isVarArg, + const SmallVectorImpl<EVT> &OutTys, + const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags, + SelectionDAG &DAG); }; } diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td index 68e69a2..4ed4ed4 100644 --- a/lib/Target/XCore/XCoreInstrInfo.td +++ b/lib/Target/XCore/XCoreInstrInfo.td @@ -617,7 +617,7 @@ defm EXTSP : FU6_LU6_np<"extsp">; let mayStore = 1 in defm ENTSP : FU6_LU6_np<"entsp">; -let isReturn = 1, isTerminator = 1, mayLoad = 1 in { +let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in { defm RETSP : FU6_LU6<"retsp", XCoreRetsp>; } } diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp index 136a035..c7c8c7b 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.cpp +++ b/lib/Target/XCore/XCoreRegisterInfo.cpp @@ -330,9 +330,10 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, int FrameIdx; if (! isVarArg) { // A fixed offset of 0 allows us to save / restore LR using entsp / retsp. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false); } else { - FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment()); + FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), + false); } XFI->setUsesLR(FrameIdx); XFI->setLRSpillSlot(FrameIdx); @@ -340,13 +341,15 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (requiresRegisterScavenging(MF)) { // Reserve a slot close to SP or frame pointer. RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } if (hasFP(MF)) { // A callee save register is used to hold the FP. // This needs saving / restoring in the epilogue / prologue. XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment())); + RC->getAlignment(), + false)); } } @@ -593,7 +596,7 @@ int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } -unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const { +unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const { bool FP = hasFP(MF); return FP ? XCore::R10 : XCore::SP; diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h index a7df510..8ab1750 100644 --- a/lib/Target/XCore/XCoreRegisterInfo.h +++ b/lib/Target/XCore/XCoreRegisterInfo.h @@ -60,7 +60,7 @@ public: unsigned eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, int *Value = NULL, RegScavenger *RS = NULL) const; - + void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = NULL) const; @@ -71,7 +71,7 @@ public: // Debug information queries. unsigned getRARegister() const; - unsigned getFrameRegister(MachineFunction &MF) const; + unsigned getFrameRegister(const MachineFunction &MF) const; void getInitialFrameState(std::vector<MachineMove> &Moves) const; //! Return the array of argument passing registers diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 75f2055..267f46a 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -25,7 +25,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT, : LLVMTargetMachine(T, TT), Subtarget(TT, FS), DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-" - "i16:16:32-i32:32:32-i64:32:32"), + "i16:16:32-i32:32:32-i64:32:32-n32"), InstrInfo(), FrameInfo(*this), TLInfo(*this) { |