diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/X86InstrCompiler.td')
-rw-r--r-- | contrib/llvm/lib/Target/X86/X86InstrCompiler.td | 288 |
1 files changed, 180 insertions, 108 deletions
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td index 612b2fa..6f9e849 100644 --- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td @@ -112,23 +112,39 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in // allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, Requires<[In32BitMode]>; -let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), "# variable sized alloca for segmented stacks", [(set GR64:$dst, (X86SegAlloca GR64:$size))]>, Requires<[In64BitMode]>; - } +// The MSVC runtime contains an _ftol2 routine for converting floating-point +// to integer values. It has a strange calling convention: the input is +// popped from the x87 stack, and the return value is given in EDX:EAX. No +// other registers (aside from flags) are touched. +// Microsoft toolchains do not support 80-bit precision, so a WIN_FTOL_80 +// variant is unnecessary. + +let Defs = [EAX, EDX, EFLAGS], FPForm = SpecialFP in { + def WIN_FTOL_32 : I<0, Pseudo, (outs), (ins RFP32:$src), + "# win32 fptoui", + [(X86WinFTOL RFP32:$src)]>, + Requires<[In32BitMode]>; + def WIN_FTOL_64 : I<0, Pseudo, (outs), (ins RFP64:$src), + "# win32 fptoui", + [(X86WinFTOL RFP64:$src)]>, + Requires<[In32BitMode]>; +} //===----------------------------------------------------------------------===// // EH Pseudo Instructions @@ -137,7 +153,7 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN : I<0xC3, RawFrm, (outs), (ins GR32:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR32:$addr)]>; + [(X86ehret GR32:$addr)], IIC_RET>; } @@ -145,8 +161,26 @@ let isTerminator = 1, isReturn = 1, isBarrier = 1, hasCtrlDep = 1, isCodeGenOnly = 1 in { def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), "ret\t#eh_return, addr: $addr", - [(X86ehret GR64:$addr)]>; + [(X86ehret GR64:$addr)], IIC_RET>; + +} + +//===----------------------------------------------------------------------===// +// Pseudo instructions used by segmented stacks. +// +// This is lowered into a RET instruction by MCInstLower. We need +// this so that we don't have to have a MachineBasicBlock which ends +// with a RET and also has successors. +let isPseudo = 1 in { +def MORESTACK_RET: I<0, Pseudo, (outs), (ins), + "", []>; + +// This instruction is lowered to a RET followed by a MOV. The two +// instructions are not generated on a higher level since then the +// verifier sees a MachineBasicBlock ending with a non-terminator. +def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), + "", []>; } //===----------------------------------------------------------------------===// @@ -159,7 +193,7 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in { def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", - [(set GR8:$dst, 0)]>; + [(set GR8:$dst, 0)], IIC_ALU_NONMEM>; // We want to rewrite MOV16r0 in terms of MOV32r0, because it's a smaller // encoding and avoids a partial-register update sometimes, but doing so @@ -168,11 +202,11 @@ def MOV8r0 : I<0x30, MRMInitReg, (outs GR8 :$dst), (ins), "", // to an MCInst. def MOV16r0 : I<0x31, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, 0)]>, OpSize; + [(set GR16:$dst, 0)], IIC_ALU_NONMEM>, OpSize; // FIXME: Set encoding to pseudo. def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, 0)]>; + [(set GR32:$dst, 0)], IIC_ALU_NONMEM>; } // We want to rewrite MOV64r0 in terms of MOV32r0, because it's sometimes a @@ -184,7 +218,7 @@ def MOV32r0 : I<0x31, MRMInitReg, (outs GR32:$dst), (ins), "", let Defs = [EFLAGS], isCodeGenOnly=1, AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, 0)]>; + [(set GR64:$dst, 0)], IIC_ALU_NONMEM>; // Materialize i64 constant where top 32-bits are zero. This could theoretically // use MOV32ri with a SUBREG_TO_REG to represent the zero-extension, however @@ -192,7 +226,8 @@ def MOV64r0 : I<0x31, MRMInitReg, (outs GR64:$dst), (ins), "", let AddedComplexity = 1, isReMaterializable = 1, isAsCheapAsAMove = 1, isCodeGenOnly = 1 in def MOV64ri64i32 : Ii32<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64i32imm:$src), - "", [(set GR64:$dst, i64immZExt32:$src)]>; + "", [(set GR64:$dst, i64immZExt32:$src)], + IIC_ALU_NONMEM>; // Use sbb to materialize carry bit. let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { @@ -202,14 +237,18 @@ let Uses = [EFLAGS], Defs = [EFLAGS], isCodeGenOnly = 1 in { // FIXME: Change these to have encoding Pseudo when X86MCCodeEmitter replaces // X86CodeEmitter. def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins), "", - [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins), "", - [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>, + [(set GR16:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>, OpSize; def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins), "", - [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR32:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins), "", - [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>; + [(set GR64:$dst, (X86setcc_c X86_COND_B, EFLAGS))], + IIC_ALU_NONMEM>; } // isCodeGenOnly @@ -262,34 +301,67 @@ def : Pat<(sub GR64:$op, (i64 (X86setcc_c X86_COND_B, EFLAGS))), // String Pseudo Instructions // let Defs = [ECX,EDI,ESI], Uses = [ECX,EDI,ESI], isCodeGenOnly = 1 in { -def REP_MOVSB : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", - [(X86rep_movs i8)]>, REP; -def REP_MOVSW : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", - [(X86rep_movs i16)]>, REP, OpSize; -def REP_MOVSD : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", - [(X86rep_movs i32)]>, REP; +def REP_MOVSB_32 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; +def REP_MOVSW_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In32BitMode]>; +def REP_MOVSD_32 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In32BitMode]>; } -let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in -def REP_MOVSQ : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", - [(X86rep_movs i64)]>, REP; - +let Defs = [RCX,RDI,RSI], Uses = [RCX,RDI,RSI], isCodeGenOnly = 1 in { +def REP_MOVSB_64 : I<0xA4, RawFrm, (outs), (ins), "{rep;movsb|rep movsb}", + [(X86rep_movs i8)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSW_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsw|rep movsw}", + [(X86rep_movs i16)], IIC_REP_MOVS>, REP, OpSize, + Requires<[In64BitMode]>; +def REP_MOVSD_64 : I<0xA5, RawFrm, (outs), (ins), "{rep;movsl|rep movsd}", + [(X86rep_movs i32)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +def REP_MOVSQ_64 : RI<0xA5, RawFrm, (outs), (ins), "{rep;movsq|rep movsq}", + [(X86rep_movs i64)], IIC_REP_MOVS>, REP, + Requires<[In64BitMode]>; +} // FIXME: Should use "(X86rep_stos AL)" as the pattern. -let Defs = [ECX,EDI], Uses = [AL,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSB : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", - [(X86rep_stos i8)]>, REP; -let Defs = [ECX,EDI], Uses = [AX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSW : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", - [(X86rep_stos i16)]>, REP, OpSize; -let Defs = [ECX,EDI], Uses = [EAX,ECX,EDI], isCodeGenOnly = 1 in -def REP_STOSD : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", - [(X86rep_stos i32)]>, REP; - -let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI], isCodeGenOnly = 1 in -def REP_STOSQ : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", - [(X86rep_stos i64)]>, REP; +let Defs = [ECX,EDI], isCodeGenOnly = 1 in { + let Uses = [AL,ECX,EDI] in + def REP_STOSB_32 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; + let Uses = [AX,ECX,EDI] in + def REP_STOSW_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In32BitMode]>; + let Uses = [EAX,ECX,EDI] in + def REP_STOSD_32 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In32BitMode]>; +} +let Defs = [RCX,RDI], isCodeGenOnly = 1 in { + let Uses = [AL,RCX,RDI] in + def REP_STOSB_64 : I<0xAA, RawFrm, (outs), (ins), "{rep;stosb|rep stosb}", + [(X86rep_stos i8)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + let Uses = [AX,RCX,RDI] in + def REP_STOSW_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosw|rep stosw}", + [(X86rep_stos i16)], IIC_REP_STOS>, REP, OpSize, + Requires<[In64BitMode]>; + let Uses = [RAX,RCX,RDI] in + def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", + [(X86rep_stos i32)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; + + let Uses = [RAX,RCX,RDI] in + def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", + [(X86rep_stos i64)], IIC_REP_STOS>, REP, + Requires<[In64BitMode]>; +} //===----------------------------------------------------------------------===// // Thread Local Storage Instructions @@ -537,22 +609,13 @@ let isCodeGenOnly = 1, Defs = [EFLAGS] in def OR32mrLocked : I<0x09, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$zero), "lock\n\t" "or{l}\t{$zero, $dst|$dst, $zero}", - []>, Requires<[In32BitMode]>, LOCK; + [], IIC_ALU_MEM>, Requires<[In32BitMode]>, LOCK; let hasSideEffects = 1 in def Int_MemBarrier : I<0, Pseudo, (outs), (ins), "#MEMBARRIER", [(X86MemBarrier)]>; -// TODO: Get this to fold the constant into the instruction. -let hasSideEffects = 1, Defs = [ESP], isCodeGenOnly = 1 in -def Int_MemBarrierNoSSE64 : RI<0x09, MRM1r, (outs), (ins GR64:$zero), - "lock\n\t" - "or{q}\t{$zero, (%rsp)|(%rsp), $zero}", - [(X86MemBarrierNoSSE GR64:$zero)]>, - Requires<[In64BitMode]>, LOCK; - - // RegOpc corresponds to the mr version of the instruction // ImmOpc corresponds to the mi version of the instruction // ImmOpc8 corresponds to the mi8 version of the instruction @@ -566,72 +629,72 @@ def #NAME#8mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, MRMDestMem, (outs), (ins i8mem:$dst, GR8:$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#16mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i16mem:$dst, GR16:$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, OpSize, LOCK; + [], IIC_ALU_NONMEM>, OpSize, LOCK; def #NAME#32mr : I<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#64mr : RI<{RegOpc{7}, RegOpc{6}, RegOpc{5}, RegOpc{4}, RegOpc{3}, RegOpc{2}, RegOpc{1}, 1 }, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_NONMEM>, LOCK; def #NAME#8mi : Ii8<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 0 }, ImmMod, (outs), (ins i8mem :$dst, i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{b}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#16mi : Ii16<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16imm :$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#32mi : Ii32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32imm :$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#64mi32 : RIi32<{ImmOpc{7}, ImmOpc{6}, ImmOpc{5}, ImmOpc{4}, ImmOpc{3}, ImmOpc{2}, ImmOpc{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i32imm :$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#16mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i16mem :$dst, i16i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{w}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#32mi8 : Ii8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i32mem :$dst, i32i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{l}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; def #NAME#64mi8 : RIi8<{ImmOpc8{7}, ImmOpc8{6}, ImmOpc8{5}, ImmOpc8{4}, ImmOpc8{3}, ImmOpc8{2}, ImmOpc8{1}, 1 }, ImmMod, (outs), (ins i64mem :$dst, i64i8imm :$src2), !strconcat("lock\n\t", mnemonic, "{q}\t", "{$src2, $dst|$dst, $src2}"), - []>, LOCK; + [], IIC_ALU_MEM>, LOCK; } @@ -648,29 +711,29 @@ let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1 in { def LOCK_INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "lock\n\t" - "inc{b}\t$dst", []>, LOCK; + "inc{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "lock\n\t" - "inc{w}\t$dst", []>, OpSize, LOCK; + "inc{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; def LOCK_INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "lock\n\t" - "inc{l}\t$dst", []>, LOCK; + "inc{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_INC64m : RI<0xFF, MRM0m, (outs), (ins i64mem:$dst), "lock\n\t" - "inc{q}\t$dst", []>, LOCK; + "inc{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "lock\n\t" - "dec{b}\t$dst", []>, LOCK; + "dec{b}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "lock\n\t" - "dec{w}\t$dst", []>, OpSize, LOCK; + "dec{w}\t$dst", [], IIC_UNARY_MEM>, OpSize, LOCK; def LOCK_DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "lock\n\t" - "dec{l}\t$dst", []>, LOCK; + "dec{l}\t$dst", [], IIC_UNARY_MEM>, LOCK; def LOCK_DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "lock\n\t" - "dec{q}\t$dst", []>, LOCK; + "dec{q}\t$dst", [], IIC_UNARY_MEM>, LOCK; } // Atomic compare and swap. @@ -679,42 +742,42 @@ let Defs = [EAX, EDX, EFLAGS], Uses = [EAX, EBX, ECX, EDX], def LCMPXCHG8B : I<0xC7, MRM1m, (outs), (ins i64mem:$ptr), "lock\n\t" "cmpxchg8b\t$ptr", - [(X86cas8 addr:$ptr)]>, TB, LOCK; + [(X86cas8 addr:$ptr)], IIC_CMPX_LOCK_8B>, TB, LOCK; let Defs = [RAX, RDX, EFLAGS], Uses = [RAX, RBX, RCX, RDX], isCodeGenOnly = 1 in def LCMPXCHG16B : RI<0xC7, MRM1m, (outs), (ins i128mem:$ptr), "lock\n\t" "cmpxchg16b\t$ptr", - [(X86cas16 addr:$ptr)]>, TB, LOCK, + [(X86cas16 addr:$ptr)], IIC_CMPX_LOCK_16B>, TB, LOCK, Requires<[HasCmpxchg16b]>; let Defs = [AL, EFLAGS], Uses = [AL], isCodeGenOnly = 1 in { def LCMPXCHG8 : I<0xB0, MRMDestMem, (outs), (ins i8mem:$ptr, GR8:$swap), "lock\n\t" "cmpxchg{b}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR8:$swap, 1)]>, TB, LOCK; + [(X86cas addr:$ptr, GR8:$swap, 1)], IIC_CMPX_LOCK_8>, TB, LOCK; } let Defs = [AX, EFLAGS], Uses = [AX], isCodeGenOnly = 1 in { def LCMPXCHG16 : I<0xB1, MRMDestMem, (outs), (ins i16mem:$ptr, GR16:$swap), "lock\n\t" "cmpxchg{w}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR16:$swap, 2)]>, TB, OpSize, LOCK; + [(X86cas addr:$ptr, GR16:$swap, 2)], IIC_CMPX_LOCK>, TB, OpSize, LOCK; } let Defs = [EAX, EFLAGS], Uses = [EAX], isCodeGenOnly = 1 in { def LCMPXCHG32 : I<0xB1, MRMDestMem, (outs), (ins i32mem:$ptr, GR32:$swap), "lock\n\t" "cmpxchg{l}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR32:$swap, 4)]>, TB, LOCK; + [(X86cas addr:$ptr, GR32:$swap, 4)], IIC_CMPX_LOCK>, TB, LOCK; } let Defs = [RAX, EFLAGS], Uses = [RAX], isCodeGenOnly = 1 in { def LCMPXCHG64 : RI<0xB1, MRMDestMem, (outs), (ins i64mem:$ptr, GR64:$swap), "lock\n\t" "cmpxchg{q}\t{$swap, $ptr|$ptr, $swap}", - [(X86cas addr:$ptr, GR64:$swap, 8)]>, TB, LOCK; + [(X86cas addr:$ptr, GR64:$swap, 8)], IIC_CMPX_LOCK>, TB, LOCK; } // Atomic exchange and add @@ -722,22 +785,26 @@ let Constraints = "$val = $dst", Defs = [EFLAGS], isCodeGenOnly = 1 in { def LXADD8 : I<0xC0, MRMSrcMem, (outs GR8:$dst), (ins GR8:$val, i8mem:$ptr), "lock\n\t" "xadd{b}\t{$val, $ptr|$ptr, $val}", - [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))]>, + [(set GR8:$dst, (atomic_load_add_8 addr:$ptr, GR8:$val))], + IIC_XADD_LOCK_MEM8>, TB, LOCK; def LXADD16 : I<0xC1, MRMSrcMem, (outs GR16:$dst), (ins GR16:$val, i16mem:$ptr), "lock\n\t" "xadd{w}\t{$val, $ptr|$ptr, $val}", - [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))]>, + [(set GR16:$dst, (atomic_load_add_16 addr:$ptr, GR16:$val))], + IIC_XADD_LOCK_MEM>, TB, OpSize, LOCK; def LXADD32 : I<0xC1, MRMSrcMem, (outs GR32:$dst), (ins GR32:$val, i32mem:$ptr), "lock\n\t" "xadd{l}\t{$val, $ptr|$ptr, $val}", - [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))]>, + [(set GR32:$dst, (atomic_load_add_32 addr:$ptr, GR32:$val))], + IIC_XADD_LOCK_MEM>, TB, LOCK; def LXADD64 : RI<0xC1, MRMSrcMem, (outs GR64:$dst), (ins GR64:$val,i64mem:$ptr), "lock\n\t" "xadd{q}\t{$val, $ptr|$ptr, $val}", - [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))]>, + [(set GR64:$dst, (atomic_load_add_64 addr:$ptr, GR64:$val))], + IIC_XADD_LOCK_MEM>, TB, LOCK; } @@ -936,14 +1003,9 @@ def : Pat<(load (i64 (X86Wrapper tglobaltlsaddr :$dst))), // Direct PC relative function call for small code model. 32-bit displacement // sign extended to 64-bit. def : Pat<(X86call (i64 tglobaladdr:$dst)), - (CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>; + (CALL64pcrel32 tglobaladdr:$dst)>; def : Pat<(X86call (i64 texternalsym:$dst)), - (CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>; - -def : Pat<(X86call (i64 tglobaladdr:$dst)), - (WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>; -def : Pat<(X86call (i64 texternalsym:$dst)), - (WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>; + (CALL64pcrel32 texternalsym:$dst)>; // tailcall stuff def : Pat<(X86tcret GR32_TC:$dst, imm:$off), @@ -1105,12 +1167,10 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Mask = APInt::getAllOnesValue(BitWidth); APInt KnownZero0, KnownOne0; - CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); + CurDAG->ComputeMaskedBits(N->getOperand(0), KnownZero0, KnownOne0, 0); APInt KnownZero1, KnownOne1; - CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); + CurDAG->ComputeMaskedBits(N->getOperand(1), KnownZero1, KnownOne1, 0); return (~KnownZero0 & ~KnownZero1) == 0; }]>; @@ -1440,58 +1500,62 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>; def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>; def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>; +// Helper imms that check if a mask doesn't change significant shift bits. +def immShift32 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 5; }]>; +def immShift64 : ImmLeaf<i8, [{ return CountTrailingOnes_32(Imm) >= 6; }]>; + // (shl x (and y, 31)) ==> (shl x, y) -def : Pat<(shl GR8:$src1, (and CL, 31)), +def : Pat<(shl GR8:$src1, (and CL, immShift32)), (SHL8rCL GR8:$src1)>; -def : Pat<(shl GR16:$src1, (and CL, 31)), +def : Pat<(shl GR16:$src1, (and CL, immShift32)), (SHL16rCL GR16:$src1)>; -def : Pat<(shl GR32:$src1, (and CL, 31)), +def : Pat<(shl GR32:$src1, (and CL, immShift32)), (SHL32rCL GR32:$src1)>; -def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL8mCL addr:$dst)>; -def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL16mCL addr:$dst)>; -def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (shl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SHL32mCL addr:$dst)>; -def : Pat<(srl GR8:$src1, (and CL, 31)), +def : Pat<(srl GR8:$src1, (and CL, immShift32)), (SHR8rCL GR8:$src1)>; -def : Pat<(srl GR16:$src1, (and CL, 31)), +def : Pat<(srl GR16:$src1, (and CL, immShift32)), (SHR16rCL GR16:$src1)>; -def : Pat<(srl GR32:$src1, (and CL, 31)), +def : Pat<(srl GR32:$src1, (and CL, immShift32)), (SHR32rCL GR32:$src1)>; -def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR8mCL addr:$dst)>; -def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR16mCL addr:$dst)>; -def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (srl (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SHR32mCL addr:$dst)>; -def : Pat<(sra GR8:$src1, (and CL, 31)), +def : Pat<(sra GR8:$src1, (and CL, immShift32)), (SAR8rCL GR8:$src1)>; -def : Pat<(sra GR16:$src1, (and CL, 31)), +def : Pat<(sra GR16:$src1, (and CL, immShift32)), (SAR16rCL GR16:$src1)>; -def : Pat<(sra GR32:$src1, (and CL, 31)), +def : Pat<(sra GR32:$src1, (and CL, immShift32)), (SAR32rCL GR32:$src1)>; -def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi8 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR8mCL addr:$dst)>; -def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi16 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR16mCL addr:$dst)>; -def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst), +def : Pat<(store (sra (loadi32 addr:$dst), (and CL, immShift32)), addr:$dst), (SAR32mCL addr:$dst)>; // (shl x (and y, 63)) ==> (shl x, y) -def : Pat<(shl GR64:$src1, (and CL, 63)), +def : Pat<(shl GR64:$src1, (and CL, immShift64)), (SHL64rCL GR64:$src1)>; def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHL64mCL addr:$dst)>; -def : Pat<(srl GR64:$src1, (and CL, 63)), +def : Pat<(srl GR64:$src1, (and CL, immShift64)), (SHR64rCL GR64:$src1)>; def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SHR64mCL addr:$dst)>; -def : Pat<(sra GR64:$src1, (and CL, 63)), +def : Pat<(sra GR64:$src1, (and CL, immShift64)), (SAR64rCL GR64:$src1)>; def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst), (SAR64mCL addr:$dst)>; @@ -1735,3 +1799,11 @@ def : Pat<(and GR64:$src1, i64immSExt8:$src2), (AND64ri8 GR64:$src1, i64immSExt8:$src2)>; def : Pat<(and GR64:$src1, i64immSExt32:$src2), (AND64ri32 GR64:$src1, i64immSExt32:$src2)>; + +// Bit scan instruction patterns to match explicit zero-undef behavior. +def : Pat<(cttz_zero_undef GR16:$src), (BSF16rr GR16:$src)>; +def : Pat<(cttz_zero_undef GR32:$src), (BSF32rr GR32:$src)>; +def : Pat<(cttz_zero_undef GR64:$src), (BSF64rr GR64:$src)>; +def : Pat<(cttz_zero_undef (loadi16 addr:$src)), (BSF16rm addr:$src)>; +def : Pat<(cttz_zero_undef (loadi32 addr:$src)), (BSF32rm addr:$src)>; +def : Pat<(cttz_zero_undef (loadi64 addr:$src)), (BSF64rm addr:$src)>; |