diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/ARM/ARMBaseRegisterInfo.cpp | 4 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb.td | 2 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 141 | ||||
-rw-r--r-- | lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 5 | ||||
-rw-r--r-- | lib/Target/ARM/Thumb1RegisterInfo.cpp | 22 | ||||
-rw-r--r-- | lib/Target/Alpha/AlphaCallingConv.td | 3 | ||||
-rw-r--r-- | lib/Target/CellSPU/SPUMCAsmInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp | 13 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelDAGToDAG.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 91 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 19 |
13 files changed, 227 insertions, 90 deletions
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 8044966..577c363 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1350,7 +1350,9 @@ emitPrologue(MachineFunction &MF) const { unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + if (STI.isTargetDarwin() || hasFP(MF)) + AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + + NumBytes); AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 1c77f27..786dd65 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -734,7 +734,7 @@ def tMOVgpr2gpr : T1I<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVr, // multiply register let isCommutable = 1 in def tMUL : T1sIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iMUL32, - "mul", "\t$dst, $rhs", + "mul", "\t$dst, $rhs, $dst", /* A8.6.105 MUL Encoding T1 */ [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>, T1DataProcessing<0b1101>; diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 316567d..6241766 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -131,7 +131,7 @@ def t2addrmode_imm12 : Operand<i32>, let MIOperandInfo = (ops GPR:$base, i32imm:$offsimm); } -// t2addrmode_imm8 := reg - imm8 +// t2addrmode_imm8 := reg +/- imm8 def t2addrmode_imm8 : Operand<i32>, ComplexPattern<i32, 2, "SelectT2AddrModeImm8", []> { let PrintMethod = "printT2AddrModeImm8Operand"; @@ -657,6 +657,32 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> { } } +// SXTB16 and UXTB16 do not need the .w qualifier. +multiclass T2I_unary_rrot_nw<bits<3> opcod, string opc, PatFrag opnode> { + def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr, + opc, "\t$dst, $src", + [(set GPR:$dst, (opnode GPR:$src))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = 0b00; // rotate + } + def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi, + opc, "\t$dst, $src, ror $rot", + [(set GPR:$dst, (opnode (rotr GPR:$src, rot_imm:$rot)))]> { + let Inst{31-27} = 0b11111; + let Inst{26-23} = 0b0100; + let Inst{22-20} = opcod; + let Inst{19-16} = 0b1111; // Rn + let Inst{15-12} = 0b1111; + let Inst{7} = 1; + let Inst{5-4} = {?,?}; // rotate + } +} + // DO variant - disassembly only, no pattern multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> { @@ -983,6 +1009,28 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb), []>; } +// LDRT, LDRBT, LDRHT, LDRSBT, LDRSHT all have offset mode (PUW=0b110) and are +// for disassembly only. +// Ref: A8.6.57 LDR (immediate, Thumb) Encoding T4 +class T2IldT<bit signed, bits<2> type, string opc> + : T2Ii8<(outs GPR:$dst), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$dst, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = signed; + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 1; // load + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW. +} + +def t2LDRT : T2IldT<0, 0b10, "ldrt">; +def t2LDRBT : T2IldT<0, 0b00, "ldrbt">; +def t2LDRHT : T2IldT<0, 0b01, "ldrht">; +def t2LDRSBT : T2IldT<1, 0b00, "ldrsbt">; +def t2LDRSHT : T2IldT<1, 0b01, "ldrsht">; + // Store defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>; defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>; @@ -1037,9 +1085,98 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb), [(set GPR:$base_wb, (post_truncsti8 GPR:$src, GPR:$base, t2am_imm8_offset:$offset))]>; +// STRT, STRBT, STRHT all have offset mode (PUW=0b110) and are for disassembly +// only. +// Ref: A8.6.193 STR (immediate, Thumb) Encoding T4 +class T2IstT<bits<2> type, string opc> + : T2Ii8<(outs GPR:$src), (ins t2addrmode_imm8:$addr), IIC_iStorei, opc, + "\t$src, $addr", []> { + let Inst{31-27} = 0b11111; + let Inst{26-25} = 0b00; + let Inst{24} = 0; // not signed + let Inst{23} = 0; + let Inst{22-21} = type; + let Inst{20} = 0; // store + let Inst{11} = 1; + let Inst{10-8} = 0b110; // PUW +} + +def t2STRT : T2IstT<0b10, "strt">; +def t2STRBT : T2IstT<0b00, "strbt">; +def t2STRHT : T2IstT<0b01, "strht">; // FIXME: ldrd / strd pre / post variants +// T2Ipl (Preload Data/Instruction) signals the memory system of possible future +// data/instruction access. These are for disassembly only. +multiclass T2Ipl<bit instr, bit write, string opc> { + + def i12 : T2I<(outs), (ins t2addrmode_imm12:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 1; // U = 1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + } + + def i8 : T2I<(outs), (ins t2addrmode_imm8:$addr), IIC_iLoadi, opc, + "\t$addr", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // U = 0 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-8} = 0b1100; + } + + // A8.6.118 #0 and #-0 differs. Translates -0 to -1, -1 to -2, ..., etc. + def pci : T2I<(outs), (ins GPR:$base, i32imm:$imm), IIC_iLoadi, opc, + "\t[pc, ${imm:negzero}]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = ?; // add = (U == 1) + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{19-16} = 0b1111; // Rn = 0b1111 + let Inst{15-12} = 0b1111; + } + + def r : T2I<(outs), (ins GPR:$base, GPR:$a), IIC_iLoadi, opc, + "\t[$base, $a]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + let Inst{5-4} = 0b00; // no shift is applied + } + + def s : T2I<(outs), (ins GPR:$base, GPR:$a, i32imm:$shamt), IIC_iLoadi, opc, + "\t[$base, $a, lsl $shamt]", []> { + let Inst{31-25} = 0b1111100; + let Inst{24} = instr; + let Inst{23} = 0; // add = TRUE for T1 + let Inst{22} = 0; + let Inst{21} = write; + let Inst{20} = 1; + let Inst{15-12} = 0b1111; + let Inst{11-6} = 0000000; + } +} + +defm t2PLD : T2Ipl<0, 0, "pld">; +defm t2PLDW : T2Ipl<0, 1, "pldw">; +defm t2PLI : T2Ipl<1, 0, "pli">; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. // @@ -1149,7 +1286,7 @@ defm t2UXTB : T2I_unary_rrot<0b101, "uxtb", UnOpFrag<(and node:$Src, 0x000000FF)>>; defm t2UXTH : T2I_unary_rrot<0b001, "uxth", UnOpFrag<(and node:$Src, 0x0000FFFF)>>; -defm t2UXTB16 : T2I_unary_rrot<0b011, "uxtb16", +defm t2UXTB16 : T2I_unary_rrot_nw<0b011, "uxtb16", UnOpFrag<(and node:$Src, 0x00FF00FF)>>; def : T2Pat<(and (shl GPR:$Src, (i32 8)), 0xFF00FF), diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 5b4f02d..19f1e3b 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -761,6 +761,11 @@ static bool isMemoryOp(const MachineInstr *MI) { MI->getOperand(0).isUndef()) return false; + // Likewise don't mess with references to undefined addresses. + if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() && + MI->getOperand(1).isUndef()) + return false; + int Opcode = MI->getOpcode(); switch (Opcode) { default: break; diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp index b61ce29..163d1e9 100644 --- a/lib/Target/ARM/Thumb1RegisterInfo.cpp +++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp @@ -778,9 +778,19 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { } static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) { - return (MI->getOpcode() == ARM::tRestore && - MI->getOperand(1).isFI() && - isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)); + if (MI->getOpcode() == ARM::tRestore && + MI->getOperand(1).isFI() && + isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs)) + return true; + else if (MI->getOpcode() == ARM::tPOP) { + // The first three operands are predicates and such. The last two are + // imp-def and imp-use of SP. Check everything in between. + for (int i = 3, e = MI->getNumOperands() - 2; i != e; ++i) + if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) + return false; + return true; + } + return false; } void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, @@ -794,13 +804,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); int NumBytes = (int)MFI->getStackSize(); + const unsigned *CSRegs = getCalleeSavedRegs(); if (!AFI->hasStackFrame()) { if (NumBytes != 0) emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const unsigned *CSRegs = getCalleeSavedRegs(); if (MBBI != MBB.begin()) { do --MBBI; @@ -836,6 +846,9 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, } if (VARegSaveSize) { + // Move back past the callee-saved register restoration + while (MBBI != MBB.end() && isCSRestore(MBBI, CSRegs)) + ++MBBI; // Epilogue for vararg functions: pop LR to R3 and branch off it. AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))) .addReg(0) // No write back. @@ -845,6 +858,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF, BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)) .addReg(ARM::R3, RegState::Kill); + // erase the old tBX_RET instruction MBB.erase(MBBI); } } diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td index 38ada69..bde8819 100644 --- a/lib/Target/Alpha/AlphaCallingConv.td +++ b/lib/Target/Alpha/AlphaCallingConv.td @@ -14,7 +14,8 @@ //===----------------------------------------------------------------------===// def RetCC_Alpha : CallingConv<[ // i64 is returned in register R0 - CCIfType<[i64], CCAssignToReg<[R0]>>, + // R1 is an llvm extension, I don't know what gcc does + CCIfType<[i64], CCAssignToReg<[R0,R1]>>, // f32 / f64 are returned in F0/F1 CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>> diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/SPUMCAsmInfo.cpp index 5ef3c6b..3e17a51 100644 --- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp +++ b/lib/Target/CellSPU/SPUMCAsmInfo.cpp @@ -34,5 +34,8 @@ SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, const StringRef &TT) { // Exception handling is not supported on CellSPU (think about it: you only // have 256K for code+data. Would you support exception handling?) ExceptionsType = ExceptionHandling::None; + + // SPU assembly requires ".section" before ".bss" + UsesELFSectionDirectiveForBSS = true; } diff --git a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp index 46cc819..f1bdb12 100644 --- a/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp +++ b/lib/Target/PIC16/TargetInfo/PIC16TargetInfo.cpp @@ -15,7 +15,8 @@ using namespace llvm; Target llvm::ThePIC16Target, llvm::TheCooperTarget; extern "C" void LLVMInitializePIC16TargetInfo() { - RegisterTarget<> X(ThePIC16Target, "pic16", "PIC16 14-bit [experimental]"); + RegisterTarget<Triple::pic16> X(ThePIC16Target, "pic16", + "PIC16 14-bit [experimental]"); RegisterTarget<> Y(TheCooperTarget, "cooper", "PIC16 Cooper [experimental]"); } diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp index d6b45be..f6753a6 100644 --- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp @@ -204,9 +204,10 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. -/// Override AsmPrinter implementation to handle delay slots -bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) - const { +/// +/// This overrides AsmPrinter's implementation to handle delay slots. +bool SparcAsmPrinter:: +isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const { // If this is a landing pad, it isn't a fall through. If it has no preds, // then nothing falls through to it. if (MBB->isLandingPad() || MBB->pred_empty()) @@ -224,10 +225,10 @@ bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock if (!Pred->isLayoutSuccessor(MBB)) return false; - // Check if the last terminator is an unconditional branch + // Check if the last terminator is an unconditional branch. MachineBasicBlock::const_iterator I = Pred->end(); - while( I != Pred->begin() && !(--I)->getDesc().isTerminator() ) - ; /* Noop */ + while (I != Pred->begin() && !(--I)->getDesc().isTerminator()) + ; // Noop return I == Pred->end() || !I->getDesc().isBarrier(); } diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 17366ee..98e3f4e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -388,6 +388,8 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { } case Instruction::GetElementPtr: { + X86AddressMode SavedAM = AM; + // Pattern-match simple GEPs. uint64_t Disp = (int32_t)AM.Disp; unsigned IndexReg = AM.IndexReg; @@ -428,7 +430,13 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) { AM.IndexReg = IndexReg; AM.Scale = Scale; AM.Disp = (uint32_t)Disp; - return X86SelectAddress(U->getOperand(0), AM); + if (X86SelectAddress(U->getOperand(0), AM)) + return true; + + // If we couldn't merge the sub value into this addr mode, revert back to + // our address and just match the value instead of completely failing. + AM = SavedAM; + break; unsupported_gep: // Ok, the GEP indices weren't all covered. break; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 08030e0..3fad8ad 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -413,6 +413,7 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { } void X86DAGToDAGISel::PreprocessISelDAG() { + // OptForSize is used in pattern predicates that isel is matching. OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize); for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e2b8193..8384ab7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -990,7 +990,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::BUILD_VECTOR); setTargetDAGCombine(ISD::SELECT); - setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); @@ -2236,7 +2235,8 @@ static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, const X86InstrInfo *TII) { - int FI; + unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; + int FI = INT_MAX; if (Arg.getOpcode() == ISD::CopyFromReg) { unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) @@ -2252,25 +2252,30 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r) && Def->getOperand(1).isFI()) { FI = Def->getOperand(1).getIndex(); - if (MFI->getObjectSize(FI) != Flags.getByValSize()) - return false; + Bytes = Flags.getByValSize(); } else return false; } - } else { - LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg); - if (!Ld) + } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { + if (Flags.isByVal()) + // ByVal argument is passed in as a pointer but it's now being + // dereferenced. e.g. + // define @foo(%struct.X* %A) { + // tail call @bar(%struct.X* byval %A) + // } return false; SDValue Ptr = Ld->getBasePtr(); FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); if (!FINode) return false; FI = FINode->getIndex(); - } + } else + return false; + assert(FI != INT_MAX); if (!MFI->isFixedObjectIndex(FI)) return false; - return Offset == MFI->getObjectOffset(FI); + return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); } /// IsEligibleForTailCallOptimization - Check whether the call is eligible @@ -9174,58 +9179,6 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } -/// PerformANDCombine - Look for SSE and instructions of this form: -/// (and x, (build_vector signbit,signbit,signbit,signbit)). If there -/// exists a use of a build_vector that's the bitwise complement of the mask, -/// then transform the node to -/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~sb,~sb,~sb,~sb)). -static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG, - TargetLowering::DAGCombinerInfo &DCI) { - EVT VT = N->getValueType(0); - if (!VT.isVector() || !VT.isInteger()) - return SDValue(); - - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse()) - return SDValue(); - - if (N1.getOpcode() == ISD::BUILD_VECTOR) { - unsigned NumElts = VT.getVectorNumElements(); - EVT EltVT = VT.getVectorElementType(); - SmallVector<SDValue, 8> Mask; - Mask.reserve(NumElts); - for (unsigned i = 0; i != NumElts; ++i) { - SDValue Arg = N1.getOperand(i); - if (Arg.getOpcode() == ISD::UNDEF) { - Mask.push_back(Arg); - continue; - } - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg); - if (!C) - return SDValue(); - if (!C->getAPIntValue().isSignBit() && - !C->getAPIntValue().isMaxSignedValue()) - return SDValue(); - Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT)); - } - N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT, - &Mask[0], NumElts); - if (!N1.use_empty()) { - unsigned Bits = EltVT.getSizeInBits(); - Mask.clear(); - for (unsigned i = 0; i != NumElts; ++i) - Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT)); - SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), - VT, &Mask[0], NumElts); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, - N0, NewMask), N1); - } - } - - return SDValue(); -} /// PerformMulCombine - Optimize a single multiply with constant into two /// in order to implement it with two cheaper instructions, e.g. @@ -9755,7 +9708,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this); case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget); case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI); - case ISD::AND: return PerformANDCombine(N, DAG, DCI); case ISD::MUL: return PerformMulCombine(N, DAG, DCI); case ISD::SHL: case ISD::SRA: @@ -9838,11 +9790,20 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { // rorw $$8, ${0:w} --> llvm.bswap.i16 if (CI->getType()->isIntegerTy(16) && AsmPieces.size() == 3 && - AsmPieces[0] == "rorw" && + (AsmPieces[0] == "rorw" || AsmPieces[0] == "rolw") && AsmPieces[1] == "$$8," && AsmPieces[2] == "${0:w}" && - IA->getConstraintString() == "=r,0,~{dirflag},~{fpsr},~{flags},~{cc}") { - return LowerToBSwap(CI); + IA->getConstraintString().compare(0, 5, "=r,0,") == 0) { + AsmPieces.clear(); + SplitString(IA->getConstraintString().substr(5), AsmPieces, ","); + std::sort(AsmPieces.begin(), AsmPieces.end()); + if (AsmPieces.size() == 4 && + AsmPieces[0] == "~{cc}" && + AsmPieces[1] == "~{dirflag}" && + AsmPieces[2] == "~{flags}" && + AsmPieces[3] == "~{fpsr}") { + return LowerToBSwap(CI); + } } break; case 3: diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index cfe71a5..d46b946 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1050,7 +1050,10 @@ def MOV32cr : I<0x22, MRMSrcReg, (outs CONTROL_REG_32:$dst), (ins GR32:$src), // // Extra precision multiplication -let Defs = [AL,AH,EFLAGS], Uses = [AL] in + +// AL is really implied by AX, by the registers in Defs must match the +// SDNode results (i8, i32). +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8r : I<0xF6, MRM4r, (outs), (ins GR8:$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. // This probably ought to be moved to a def : Pat<> if the @@ -1068,7 +1071,7 @@ def MUL32r : I<0xF7, MRM4r, (outs), (ins GR32:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*GR32 -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def MUL8m : I<0xF6, MRM4m, (outs), (ins i8mem :$src), "mul{b}\t$src", // FIXME: Used for 8-bit mul, ignore result upper 8 bits. @@ -1090,7 +1093,7 @@ def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), } let neverHasSideEffects = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8r : I<0xF6, MRM5r, (outs), (ins GR8:$src), "imul{b}\t$src", []>; // AL,AH = AL*GR8 let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1100,7 +1103,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*GR32 let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AL] in +let Defs = [AL,EFLAGS,AX], Uses = [AL] in def IMUL8m : I<0xF6, MRM5m, (outs), (ins i8mem :$src), "imul{b}\t$src", []>; // AL,AH = AL*[mem8] let Defs = [AX,DX,EFLAGS], Uses = [AX] in @@ -1113,7 +1116,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), } // neverHasSideEffects // unsigned division/remainder -let Defs = [AX,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1123,7 +1126,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def DIV32r : I<0xF7, MRM6r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "div{l}\t$src", []>; let mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def DIV8m : I<0xF6, MRM6m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "div{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1136,7 +1139,7 @@ def DIV32m : I<0xF7, MRM6m, (outs), (ins i32mem:$src), } // Signed division/remainder. -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8r : I<0xF6, MRM7r, (outs), (ins GR8:$src), // AX/r8 = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in @@ -1146,7 +1149,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX,EDX] in def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX "idiv{l}\t$src", []>; let mayLoad = 1, mayLoad = 1 in { -let Defs = [AL,AH,EFLAGS], Uses = [AX] in +let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in |