summaryrefslogtreecommitdiffstats
path: root/contrib/llvm/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm/lib/Target/X86')
-rw-r--r--contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp139
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c283
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h86
-rw-r--r--contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h29
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp19
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h14
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp40
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp1
-rw-r--r--contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h9
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp66
-rw-r--r--contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h26
-rw-r--r--contrib/llvm/lib/Target/X86/X86.td23
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmBackend.cpp65
-rw-r--r--contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp5
-rw-r--r--contrib/llvm/lib/Target/X86/X86CallingConv.td7
-rw-r--r--contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp8
-rw-r--r--contrib/llvm/lib/Target/X86/X86FastISel.cpp507
-rw-r--r--contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86FrameLowering.cpp161
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp75
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.cpp264
-rw-r--r--contrib/llvm/lib/Target/X86/X86ISelLowering.h9
-rw-r--r--contrib/llvm/lib/Target/X86/X86Instr3DNow.td109
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrArithmetic.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrControl.td10
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFormats.td66
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td2
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.cpp41
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.h136
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrInfo.td78
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSSE.td861
-rw-r--r--contrib/llvm/lib/Target/X86/X86InstrSystem.td40
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCAsmInfo.h8
-rw-r--r--contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp30
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp51
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.h6
-rw-r--r--contrib/llvm/lib/Target/X86/X86RegisterInfo.td20
-rw-r--r--contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp2
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.cpp3
-rw-r--r--contrib/llvm/lib/Target/X86/X86Subtarget.h19
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetMachine.cpp48
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp23
-rw-r--r--contrib/llvm/lib/Target/X86/X86TargetObjectFile.h10
45 files changed, 2235 insertions, 1196 deletions
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 8fe549b..c352bfc 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -53,6 +53,14 @@ private:
SmallVectorImpl<MCParsedAsmOperand*> &Operands,
MCStreamer &Out);
+ /// isSrcOp - Returns true if operand is either (%rsi) or %ds:%(rsi)
+ /// in 64bit mode or (%edi) or %es:(%edi) in 32bit mode.
+ bool isSrcOp(X86Operand &Op);
+
+ /// isDstOp - Returns true if operand is either %es:(%rdi) in 64bit mode
+ /// or %es:(%edi) in 32bit mode.
+ bool isDstOp(X86Operand &Op);
+
/// @name Auto-generated Matcher Functions
/// {
@@ -356,6 +364,24 @@ struct X86Operand : public MCParsedAsmOperand {
} // end anonymous namespace.
+bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
+ unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+
+ return (Op.isMem() &&
+ (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0);
+}
+
+bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
+ unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+
+ return Op.isMem() && Op.Mem.SegReg == X86::ES &&
+ isa<MCConstantExpr>(Op.Mem.Disp) &&
+ cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
+ Op.Mem.BaseReg == basereg && Op.Mem.IndexReg == 0;
+}
bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
SMLoc &StartLoc, SMLoc &EndLoc) {
@@ -788,7 +814,106 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
delete &Op;
}
}
-
+ // Transform "ins[bwl] %dx, %es:(%edi)" into "ins[bwl]"
+ if (Name.startswith("ins") && Operands.size() == 3 &&
+ (Name == "insb" || Name == "insw" || Name == "insl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (Op.isReg() && Op.getReg() == X86::DX && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "outs[bwl] %ds:(%esi), %dx" into "out[bwl]"
+ if (Name.startswith("outs") && Operands.size() == 3 &&
+ (Name == "outsb" || Name == "outsw" || Name == "outsl")) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && Op2.isReg() && Op2.getReg() == X86::DX) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+
+ // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
+ if (Name.startswith("movs") && Operands.size() == 3 &&
+ (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
+ (Is64Bit && Name == "movsq"))) {
+ X86Operand &Op = *(X86Operand*)Operands.begin()[1];
+ X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
+ if (isSrcOp(Op) && isDstOp(Op2)) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete &Op;
+ delete &Op2;
+ }
+ }
+ // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
+ if (Name.startswith("lods") && Operands.size() == 3 &&
+ (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
+ Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isSrcOp(*Op1) && Op2->isReg()) {
+ const char *ins;
+ unsigned reg = Op2->getReg();
+ bool isLods = Name == "lods";
+ if (reg == X86::AL && (isLods || Name == "lodsb"))
+ ins = "lodsb";
+ else if (reg == X86::AX && (isLods || Name == "lodsw"))
+ ins = "lodsw";
+ else if (reg == X86::EAX && (isLods || Name == "lodsl"))
+ ins = "lodsl";
+ else if (reg == X86::RAX && (isLods || Name == "lodsq"))
+ ins = "lodsq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+ // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
+ if (Name.startswith("stos") && Operands.size() == 3 &&
+ (Name == "stos" || Name == "stosb" || Name == "stosw" ||
+ Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
+ if (isDstOp(*Op2) && Op1->isReg()) {
+ const char *ins;
+ unsigned reg = Op1->getReg();
+ bool isStos = Name == "stos";
+ if (reg == X86::AL && (isStos || Name == "stosb"))
+ ins = "stosb";
+ else if (reg == X86::AX && (isStos || Name == "stosw"))
+ ins = "stosw";
+ else if (reg == X86::EAX && (isStos || Name == "stosl"))
+ ins = "stosl";
+ else if (reg == X86::RAX && (isStos || Name == "stosq"))
+ ins = "stosq";
+ else
+ ins = NULL;
+ if (ins != NULL) {
+ Operands.pop_back();
+ Operands.pop_back();
+ delete Op1;
+ delete Op2;
+ if (Name != ins)
+ static_cast<X86Operand*>(Operands[0])->setTokenValue(ins);
+ }
+ }
+ }
+
// FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
// "shift <op>".
if ((Name.startswith("shr") || Name.startswith("sar") ||
@@ -803,6 +928,18 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
Operands.erase(Operands.begin() + 1);
}
}
+
+ // Transforms "int $3" into "int3" as a size optimization. We can't write an
+ // instalias with an immediate operand yet.
+ if (Name == "int" && Operands.size() == 2) {
+ X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
+ if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) &&
+ cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) {
+ delete Operands[1];
+ Operands.erase(Operands.begin() + 1);
+ static_cast<X86Operand*>(Operands[0])->setTokenValue("int3");
+ }
+ }
return false;
}
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index f777756..d8a105e 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -409,6 +409,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_XMM32:
case TYPE_XMM64:
case TYPE_XMM128:
+ case TYPE_XMM256:
case TYPE_DEBUGREG:
case TYPE_CONTROLREG:
return translateRMRegister(mcInst, insn);
@@ -418,6 +419,7 @@ static bool translateRM(MCInst &mcInst, const OperandSpecifier &operand,
case TYPE_M32:
case TYPE_M64:
case TYPE_M128:
+ case TYPE_M256:
case TYPE_M512:
case TYPE_Mv:
case TYPE_M32FP:
@@ -500,6 +502,9 @@ static bool translateOperand(MCInst &mcInst, const OperandSpecifier &operand,
case ENCODING_Rv:
translateRegister(mcInst, insn.opcodeRegister);
return false;
+ case ENCODING_VVVV:
+ translateRegister(mcInst, insn.vvvv);
+ return false;
case ENCODING_DUP:
return translateOperand(mcInst,
insn.spec->operands[operand.type - TYPE_DUP0],
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
index b6546fc..de1610b 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c
@@ -75,6 +75,12 @@ static int modRMRequired(OpcodeType type,
case THREEBYTE_3A:
decision = &THREEBYTE3A_SYM;
break;
+ case THREEBYTE_A6:
+ decision = &THREEBYTEA6_SYM;
+ break;
+ case THREEBYTE_A7:
+ decision = &THREEBYTEA7_SYM;
+ break;
}
return decision->opcodeDecisions[insnContext].modRMDecisions[opcode].
@@ -115,6 +121,12 @@ static InstrUID decode(OpcodeType type,
case THREEBYTE_3A:
dec = &THREEBYTE3A_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
break;
+ case THREEBYTE_A6:
+ dec = &THREEBYTEA6_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
+ case THREEBYTE_A7:
+ dec = &THREEBYTEA7_SYM.opcodeDecisions[insnContext].modRMDecisions[opcode];
+ break;
}
switch (dec->modrm_type) {
@@ -368,29 +380,109 @@ static int readPrefixes(struct InternalInstruction* insn) {
if (isPrefix)
dbgprintf(insn, "Found prefix 0x%hhx", byte);
}
+
+ insn->vexSize = 0;
- if (insn->mode == MODE_64BIT) {
- if ((byte & 0xf0) == 0x40) {
- uint8_t opcodeByte;
+ if (byte == 0xc4) {
+ uint8_t byte1;
- if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
- dbgprintf(insn, "Redundant REX prefix");
- return -1;
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
+
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 3;
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+
+ if (insn->vexSize == 3) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+ consumeByte(insn, &insn->vexPrefix[2]);
+
+ /* We simulate the REX prefix for simplicity's sake */
+
+ insn->rexPrefix = 0x40
+ | (wFromVEX3of3(insn->vexPrefix[2]) << 3)
+ | (rFromVEX2of3(insn->vexPrefix[1]) << 2)
+ | (xFromVEX2of3(insn->vexPrefix[1]) << 1)
+ | (bFromVEX2of3(insn->vexPrefix[1]) << 0);
+
+ switch (ppFromVEX3of3(insn->vexPrefix[2]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
}
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1], insn->vexPrefix[2]);
+ }
+ }
+ else if (byte == 0xc5) {
+ uint8_t byte1;
+
+ if (lookAtByte(insn, &byte1)) {
+ dbgprintf(insn, "Couldn't read second byte of VEX");
+ return -1;
+ }
- insn->rexPrefix = byte;
- insn->necessaryPrefixLocation = insn->readerCursor - 2;
-
- dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
- } else {
+ if (insn->mode == MODE_64BIT || byte1 & 0x8) {
+ insn->vexSize = 2;
+ }
+ else {
+ unconsumeByte(insn);
+ }
+
+ if (insn->vexSize == 2) {
+ insn->vexPrefix[0] = byte;
+ consumeByte(insn, &insn->vexPrefix[1]);
+
+ insn->rexPrefix = 0x40
+ | (rFromVEX2of2(insn->vexPrefix[1]) << 2);
+
+ switch (ppFromVEX2of2(insn->vexPrefix[1]))
+ {
+ default:
+ break;
+ case VEX_PREFIX_66:
+ hasOpSize = TRUE;
+ break;
+ }
+
+ dbgprintf(insn, "Found VEX prefix 0x%hhx 0x%hhx", insn->vexPrefix[0], insn->vexPrefix[1]);
+ }
+ }
+ else {
+ if (insn->mode == MODE_64BIT) {
+ if ((byte & 0xf0) == 0x40) {
+ uint8_t opcodeByte;
+
+ if (lookAtByte(insn, &opcodeByte) || ((opcodeByte & 0xf0) == 0x40)) {
+ dbgprintf(insn, "Redundant REX prefix");
+ return -1;
+ }
+
+ insn->rexPrefix = byte;
+ insn->necessaryPrefixLocation = insn->readerCursor - 2;
+
+ dbgprintf(insn, "Found REX prefix 0x%hhx", byte);
+ } else {
+ unconsumeByte(insn);
+ insn->necessaryPrefixLocation = insn->readerCursor - 1;
+ }
+ } else {
unconsumeByte(insn);
insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
- } else {
- unconsumeByte(insn);
- insn->necessaryPrefixLocation = insn->readerCursor - 1;
}
-
+
if (insn->mode == MODE_16BIT) {
insn->registerSize = (hasOpSize ? 4 : 2);
insn->addressSize = (hasAdSize ? 4 : 2);
@@ -438,6 +530,39 @@ static int readOpcode(struct InternalInstruction* insn) {
dbgprintf(insn, "readOpcode()");
insn->opcodeType = ONEBYTE;
+
+ if (insn->vexSize == 3)
+ {
+ switch (mmmmmFromVEX2of3(insn->vexPrefix[1]))
+ {
+ default:
+ dbgprintf(insn, "Unhandled m-mmmm field for instruction (0x%hhx)", mmmmmFromVEX2of3(insn->vexPrefix[1]));
+ return -1;
+ case 0:
+ break;
+ case VEX_LOB_0F:
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F38:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x38;
+ insn->opcodeType = THREEBYTE_38;
+ return consumeByte(insn, &insn->opcode);
+ case VEX_LOB_0F3A:
+ insn->twoByteEscape = 0x0f;
+ insn->threeByteEscape = 0x3a;
+ insn->opcodeType = THREEBYTE_3A;
+ return consumeByte(insn, &insn->opcode);
+ }
+ }
+ else if (insn->vexSize == 2)
+ {
+ insn->twoByteEscape = 0x0f;
+ insn->opcodeType = TWOBYTE;
+ return consumeByte(insn, &insn->opcode);
+ }
+
if (consumeByte(insn, &current))
return -1;
@@ -467,6 +592,24 @@ static int readOpcode(struct InternalInstruction* insn) {
return -1;
insn->opcodeType = THREEBYTE_3A;
+ } else if (current == 0xa6) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A6;
+ } else if (current == 0xa7) {
+ dbgprintf(insn, "Found a three-byte escape prefix (0x%hhx)", current);
+
+ insn->threeByteEscape = current;
+
+ if (consumeByte(insn, &current))
+ return -1;
+
+ insn->opcodeType = THREEBYTE_A7;
} else {
dbgprintf(insn, "Didn't find a three-byte escape prefix");
@@ -600,20 +743,64 @@ static int getID(struct InternalInstruction* insn) {
dbgprintf(insn, "getID()");
attrMask = ATTR_NONE;
-
+
if (insn->mode == MODE_64BIT)
attrMask |= ATTR_64BIT;
-
- if (insn->rexPrefix & 0x08)
- attrMask |= ATTR_REXW;
-
- if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
- attrMask |= ATTR_OPSIZE;
- else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
- attrMask |= ATTR_XS;
- else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
- attrMask |= ATTR_XD;
-
+
+ if (insn->vexSize) {
+ attrMask |= ATTR_VEX;
+
+ if (insn->vexSize == 3) {
+ switch (ppFromVEX3of3(insn->vexPrefix[2])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (wFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_REXW;
+ if (lFromVEX3of3(insn->vexPrefix[2]))
+ attrMask |= ATTR_VEXL;
+ }
+ else if (insn->vexSize == 2) {
+ switch (ppFromVEX2of2(insn->vexPrefix[1])) {
+ case VEX_PREFIX_66:
+ attrMask |= ATTR_OPSIZE;
+ break;
+ case VEX_PREFIX_F3:
+ attrMask |= ATTR_XS;
+ break;
+ case VEX_PREFIX_F2:
+ attrMask |= ATTR_XD;
+ break;
+ }
+
+ if (lFromVEX2of2(insn->vexPrefix[1]))
+ attrMask |= ATTR_VEXL;
+ }
+ else {
+ return -1;
+ }
+ }
+ else {
+ if (insn->rexPrefix & 0x08)
+ attrMask |= ATTR_REXW;
+
+ if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_OPSIZE;
+ else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XS;
+ else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation))
+ attrMask |= ATTR_XD;
+
+ }
+
if (getIDWithAttrMask(&instructionID, insn, attrMask))
return -1;
@@ -749,7 +936,7 @@ static int readSIB(struct InternalInstruction* insn) {
insn->sibIndex = SIB_INDEX_NONE;
break;
default:
- insn->sibIndex = (EABase)(sibIndexBase + index);
+ insn->sibIndex = (SIBIndex)(sibIndexBase + index);
if (insn->sibIndex == SIB_INDEX_sib ||
insn->sibIndex == SIB_INDEX_sib64)
insn->sibIndex = SIB_INDEX_NONE;
@@ -796,7 +983,7 @@ static int readSIB(struct InternalInstruction* insn) {
}
break;
default:
- insn->sibBase = (EABase)(sibBaseBase + base);
+ insn->sibBase = (SIBBase)(sibBaseBase + base);
break;
}
@@ -1012,6 +1199,8 @@ static int readModRM(struct InternalInstruction* insn) {
return prefix##_EAX + index; \
case TYPE_R64: \
return prefix##_RAX + index; \
+ case TYPE_XMM256: \
+ return prefix##_YMM0 + index; \
case TYPE_XMM128: \
case TYPE_XMM64: \
case TYPE_XMM32: \
@@ -1073,6 +1262,14 @@ static int fixupReg(struct InternalInstruction *insn,
default:
debug("Expected a REG or R/M encoding in fixupReg");
return -1;
+ case ENCODING_VVVV:
+ insn->vvvv = (Reg)fixupRegValue(insn,
+ (OperandType)op->type,
+ insn->vvvv,
+ &valid);
+ if (!valid)
+ return -1;
+ break;
case ENCODING_REG:
insn->reg = (Reg)fixupRegValue(insn,
(OperandType)op->type,
@@ -1237,6 +1434,27 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) {
}
/*
+ * readVVVV - Consumes an immediate operand from an instruction, given the
+ * desired operand size.
+ *
+ * @param insn - The instruction whose operand is to be read.
+ * @return - 0 if the immediate was successfully consumed; nonzero
+ * otherwise.
+ */
+static int readVVVV(struct InternalInstruction* insn) {
+ dbgprintf(insn, "readVVVV()");
+
+ if (insn->vexSize == 3)
+ insn->vvvv = vvvvFromVEX3of3(insn->vexPrefix[2]);
+ else if (insn->vexSize == 2)
+ insn->vvvv = vvvvFromVEX2of2(insn->vexPrefix[1]);
+ else
+ return -1;
+
+ return 0;
+}
+
+/*
* readOperands - Consults the specifier for an instruction and consumes all
* operands for that instruction, interpreting them as it goes.
*
@@ -1317,6 +1535,13 @@ static int readOperands(struct InternalInstruction* insn) {
case ENCODING_I:
if (readOpcodeModifier(insn))
return -1;
+ break;
+ case ENCODING_VVVV:
+ if (readVVVV(insn))
+ return -1;
+ if (fixupReg(insn, &insn->spec->operands[index]))
+ return -1;
+ break;
case ENCODING_DUP:
break;
default:
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index d0dc8b5..a9c90f8 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -34,16 +34,30 @@ extern "C" {
/*
* Accessor functions for various fields of an Intel instruction
*/
-#define modFromModRM(modRM) ((modRM & 0xc0) >> 6)
-#define regFromModRM(modRM) ((modRM & 0x38) >> 3)
-#define rmFromModRM(modRM) (modRM & 0x7)
-#define scaleFromSIB(sib) ((sib & 0xc0) >> 6)
-#define indexFromSIB(sib) ((sib & 0x38) >> 3)
-#define baseFromSIB(sib) (sib & 0x7)
-#define wFromREX(rex) ((rex & 0x8) >> 3)
-#define rFromREX(rex) ((rex & 0x4) >> 2)
-#define xFromREX(rex) ((rex & 0x2) >> 1)
-#define bFromREX(rex) (rex & 0x1)
+#define modFromModRM(modRM) (((modRM) & 0xc0) >> 6)
+#define regFromModRM(modRM) (((modRM) & 0x38) >> 3)
+#define rmFromModRM(modRM) ((modRM) & 0x7)
+#define scaleFromSIB(sib) (((sib) & 0xc0) >> 6)
+#define indexFromSIB(sib) (((sib) & 0x38) >> 3)
+#define baseFromSIB(sib) ((sib) & 0x7)
+#define wFromREX(rex) (((rex) & 0x8) >> 3)
+#define rFromREX(rex) (((rex) & 0x4) >> 2)
+#define xFromREX(rex) (((rex) & 0x2) >> 1)
+#define bFromREX(rex) ((rex) & 0x1)
+
+#define rFromVEX2of3(vex) (((~(vex)) & 0x80) >> 7)
+#define xFromVEX2of3(vex) (((~(vex)) & 0x40) >> 6)
+#define bFromVEX2of3(vex) (((~(vex)) & 0x20) >> 5)
+#define mmmmmFromVEX2of3(vex) ((vex) & 0x1f)
+#define wFromVEX3of3(vex) (((vex) & 0x80) >> 7)
+#define vvvvFromVEX3of3(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX3of3(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX3of3(vex) ((vex) & 0x3)
+
+#define rFromVEX2of2(vex) (((~(vex)) & 0x80) >> 7)
+#define vvvvFromVEX2of2(vex) (((~(vex)) & 0x78) >> 3)
+#define lFromVEX2of2(vex) (((vex) & 0x4) >> 2)
+#define ppFromVEX2of2(vex) ((vex) & 0x3)
/*
* These enums represent Intel registers for use by the decoder.
@@ -206,7 +220,25 @@ extern "C" {
ENTRY(XMM13) \
ENTRY(XMM14) \
ENTRY(XMM15)
-
+
+#define REGS_YMM \
+ ENTRY(YMM0) \
+ ENTRY(YMM1) \
+ ENTRY(YMM2) \
+ ENTRY(YMM3) \
+ ENTRY(YMM4) \
+ ENTRY(YMM5) \
+ ENTRY(YMM6) \
+ ENTRY(YMM7) \
+ ENTRY(YMM8) \
+ ENTRY(YMM9) \
+ ENTRY(YMM10) \
+ ENTRY(YMM11) \
+ ENTRY(YMM12) \
+ ENTRY(YMM13) \
+ ENTRY(YMM14) \
+ ENTRY(YMM15)
+
#define REGS_SEGMENT \
ENTRY(ES) \
ENTRY(CS) \
@@ -252,6 +284,7 @@ extern "C" {
REGS_64BIT \
REGS_MMX \
REGS_XMM \
+ REGS_YMM \
REGS_SEGMENT \
REGS_DEBUG \
REGS_CONTROL \
@@ -332,6 +365,27 @@ typedef enum {
SEG_OVERRIDE_GS,
SEG_OVERRIDE_max
} SegmentOverride;
+
+/*
+ * VEXLeadingOpcodeByte - Possible values for the VEX.m-mmmm field
+ */
+
+typedef enum {
+ VEX_LOB_0F = 0x1,
+ VEX_LOB_0F38 = 0x2,
+ VEX_LOB_0F3A = 0x3
+} VEXLeadingOpcodeByte;
+
+/*
+ * VEXPrefixCode - Possible values for the VEX.pp field
+ */
+
+typedef enum {
+ VEX_PREFIX_NONE = 0x0,
+ VEX_PREFIX_66 = 0x1,
+ VEX_PREFIX_F3 = 0x2,
+ VEX_PREFIX_F2 = 0x3
+} VEXPrefixCode;
typedef uint8_t BOOL;
@@ -389,10 +443,12 @@ struct InternalInstruction {
uint8_t prefixPresent[0x100];
/* contains the location (for use with the reader) of the prefix byte */
uint64_t prefixLocations[0x100];
+ /* The value of the VEX prefix, if present */
+ uint8_t vexPrefix[3];
+ /* The length of the VEX prefix (0 if not present) */
+ uint8_t vexSize;
/* The value of the REX prefix, if present */
uint8_t rexPrefix;
- /* The location of the REX prefix */
- uint64_t rexLocation;
/* The location where a mandatory prefix would have to be (i.e., right before
the opcode, or right before the REX prefix if one is present) */
uint64_t necessaryPrefixLocation;
@@ -428,6 +484,10 @@ struct InternalInstruction {
/* state for additional bytes, consumed during operand decode. Pattern:
consumed___ indicates that the byte was already consumed and does not
need to be consumed again */
+
+ /* The VEX.vvvv field, which contains a third register operand for some AVX
+ instructions */
+ Reg vvvv;
/* The ModR/M byte, which contains most register operands and some portion of
all memory operands */
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
index 1425b86..70315ed 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h
@@ -30,6 +30,8 @@
#define TWOBYTE_SYM x86DisassemblerTwoByteOpcodes
#define THREEBYTE38_SYM x86DisassemblerThreeByte38Opcodes
#define THREEBYTE3A_SYM x86DisassemblerThreeByte3AOpcodes
+#define THREEBYTEA6_SYM x86DisassemblerThreeByteA6Opcodes
+#define THREEBYTEA7_SYM x86DisassemblerThreeByteA7Opcodes
#define INSTRUCTIONS_STR "x86DisassemblerInstrSpecifiers"
#define CONTEXTS_STR "x86DisassemblerContexts"
@@ -37,6 +39,8 @@
#define TWOBYTE_STR "x86DisassemblerTwoByteOpcodes"
#define THREEBYTE38_STR "x86DisassemblerThreeByte38Opcodes"
#define THREEBYTE3A_STR "x86DisassemblerThreeByte3AOpcodes"
+#define THREEBYTEA6_STR "x86DisassemblerThreeByteA6Opcodes"
+#define THREEBYTEA7_STR "x86DisassemblerThreeByteA7Opcodes"
/*
* Attributes of an instruction that must be known before the opcode can be
@@ -49,7 +53,9 @@
ENUM_ENTRY(ATTR_XS, 0x02) \
ENUM_ENTRY(ATTR_XD, 0x04) \
ENUM_ENTRY(ATTR_REXW, 0x08) \
- ENUM_ENTRY(ATTR_OPSIZE, 0x10)
+ ENUM_ENTRY(ATTR_OPSIZE, 0x10) \
+ ENUM_ENTRY(ATTR_VEX, 0x20) \
+ ENUM_ENTRY(ATTR_VEXL, 0x40)
#define ENUM_ENTRY(n, v) n = v,
enum attributeBits {
@@ -87,7 +93,20 @@ enum attributeBits {
"IC_64BIT_REXW_XS") \
ENUM_ENTRY(IC_64BIT_REXW_OPSIZE, 7, "The Dynamic Duo! Prefer over all " \
"else because this changes most " \
- "operands' meaning")
+ "operands' meaning") \
+ ENUM_ENTRY(IC_VEX, 1, "requires a VEX prefix") \
+ ENUM_ENTRY(IC_VEX_XS, 2, "requires VEX and the XS prefix") \
+ ENUM_ENTRY(IC_VEX_XD, 2, "requires VEX and the XD prefix") \
+ ENUM_ENTRY(IC_VEX_OPSIZE, 2, "requires VEX and the OpSize prefix") \
+ ENUM_ENTRY(IC_VEX_W, 3, "requires VEX and the W prefix") \
+ ENUM_ENTRY(IC_VEX_W_XS, 4, "requires VEX, W, and XS prefix") \
+ ENUM_ENTRY(IC_VEX_W_XD, 4, "requires VEX, W, and XD prefix") \
+ ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \
+ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \
+ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\
+ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize")
+
#define ENUM_ENTRY(n, r, d) n,
typedef enum {
@@ -104,7 +123,9 @@ typedef enum {
ONEBYTE = 0,
TWOBYTE = 1,
THREEBYTE_38 = 2,
- THREEBYTE_3A = 3
+ THREEBYTE_3A = 3,
+ THREEBYTE_A6 = 4,
+ THREEBYTE_A7 = 5
} OpcodeType;
/*
@@ -183,6 +204,7 @@ struct ContextDecision {
ENUM_ENTRY(ENCODING_NONE, "") \
ENUM_ENTRY(ENCODING_REG, "Register operand in ModR/M byte.") \
ENUM_ENTRY(ENCODING_RM, "R/M operand in ModR/M byte.") \
+ ENUM_ENTRY(ENCODING_VVVV, "Register operand in VEX.vvvv byte.") \
ENUM_ENTRY(ENCODING_CB, "1-byte code offset (possible new CS value)") \
ENUM_ENTRY(ENCODING_CW, "2-byte") \
ENUM_ENTRY(ENCODING_CD, "4-byte") \
@@ -278,6 +300,7 @@ struct ContextDecision {
ENUM_ENTRY(TYPE_XMM32, "4-byte XMM register or memory operand") \
ENUM_ENTRY(TYPE_XMM64, "8-byte") \
ENUM_ENTRY(TYPE_XMM128, "16-byte") \
+ ENUM_ENTRY(TYPE_XMM256, "32-byte") \
ENUM_ENTRY(TYPE_XMM0, "Implicit use of XMM0") \
ENUM_ENTRY(TYPE_SEGMENTREG, "Segment register operand") \
ENUM_ENTRY(TYPE_DEBUGREG, "Debug register operand") \
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index d6950f4..dd6e353 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86ATTInstPrinter.h"
#include "X86InstComments.h"
+#include "X86Subtarget.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
@@ -22,24 +23,38 @@
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "X86GenInstrNames.inc"
+#include <map>
using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#define GET_INSTRUCTION_NAME
+#define PRINT_ALIAS_INSTR
+#include "X86GenRegisterNames.inc"
#include "X86GenAsmWriter.inc"
+#undef PRINT_ALIAS_INSTR
+#undef GET_INSTRUCTION_NAME
+
+X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+ : MCInstPrinter(MAI) {
+ // Initialize the set of available features.
+ setAvailableFeatures(ComputeAvailableFeatures(
+ &TM.getSubtarget<X86Subtarget>()));
+}
void X86ATTInstPrinter::printInst(const MCInst *MI, raw_ostream &OS) {
- printInstruction(MI, OS);
+ // Try to print any aliases first.
+ if (!printAliasInstr(MI, OS))
+ printInstruction(MI, OS);
// If verbose assembly is enabled, we can print some informative comments.
if (CommentStream)
EmitAnyX86InstComments(MI, *CommentStream, getRegisterName);
}
+
StringRef X86ATTInstPrinter::getOpcodeName(unsigned Opcode) const {
return getInstructionName(Opcode);
}
-
void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op,
raw_ostream &O) {
switch (MI->getOperand(Op).getImm()) {
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index eb98664..8d69391 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -17,16 +17,24 @@
#include "llvm/MC/MCInstPrinter.h"
namespace llvm {
- class MCOperand;
+
+class MCOperand;
+class X86Subtarget;
+class TargetMachine;
class X86ATTInstPrinter : public MCInstPrinter {
public:
- X86ATTInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
-
+ X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
+ // Methods used to print the alias of an instruction.
+ unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
+ // Autogenerated by tblgen, returns true if we successfully printed an
+ // alias.
+ bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
+
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &OS);
static const char *getRegisterName(unsigned RegNo);
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index 12144e3..c642acc 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -111,28 +111,28 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
// FALL THROUGH.
case X86::PUNPCKLBWrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(16, ShuffleMask);
+ DecodePUNPCKLBWMask(16, ShuffleMask);
break;
case X86::PUNPCKLWDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLWDrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(8, ShuffleMask);
+ DecodePUNPCKLWDMask(8, ShuffleMask);
break;
case X86::PUNPCKLDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(4, ShuffleMask);
+ DecodePUNPCKLDQMask(4, ShuffleMask);
break;
case X86::PUNPCKLQDQrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::PUNPCKLQDQrm:
Src1Name = getRegName(MI->getOperand(0).getReg());
- DecodePUNPCKLMask(2, ShuffleMask);
+ DecodePUNPCKLQDQMask(2, ShuffleMask);
break;
case X86::SHUFPDrri:
@@ -153,16 +153,44 @@ void llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPDrm:
- DecodeUNPCKLPMask(2, ShuffleMask);
+ DecodeUNPCKLPDMask(2, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPDrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDrm:
+ DecodeUNPCKLPDMask(2, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPDYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPDYrm:
+ DecodeUNPCKLPDMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
case X86::UNPCKLPSrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
case X86::UNPCKLPSrm:
- DecodeUNPCKLPMask(4, ShuffleMask);
+ DecodeUNPCKLPSMask(4, ShuffleMask);
Src1Name = getRegName(MI->getOperand(0).getReg());
break;
+ case X86::VUNPCKLPSrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSrm:
+ DecodeUNPCKLPSMask(4, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
+ case X86::VUNPCKLPSYrr:
+ Src2Name = getRegName(MI->getOperand(2).getReg());
+ // FALL THROUGH.
+ case X86::VUNPCKLPSYrm:
+ DecodeUNPCKLPSMask(8, ShuffleMask);
+ Src1Name = getRegName(MI->getOperand(1).getReg());
+ break;
case X86::UNPCKHPDrr:
Src2Name = getRegName(MI->getOperand(2).getReg());
// FALL THROUGH.
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 0484529..47253eb 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "asm-printer"
#include "X86IntelInstPrinter.h"
#include "X86InstComments.h"
+#include "X86Subtarget.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index 6f12032..ca99dc0 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -18,13 +18,15 @@
#include "llvm/Support/raw_ostream.h"
namespace llvm {
- class MCOperand;
+
+class MCOperand;
+class TargetMachine;
class X86IntelInstPrinter : public MCInstPrinter {
public:
- X86IntelInstPrinter(const MCAsmInfo &MAI)
+ X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
: MCInstPrinter(MAI) {}
-
+
virtual void printInst(const MCInst *MI, raw_ostream &OS);
virtual StringRef getOpcodeName(unsigned Opcode) const;
@@ -33,7 +35,6 @@ public:
static const char *getRegisterName(unsigned RegNo);
static const char *getInstructionName(unsigned Opcode);
-
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemReference(const MCInst *MI, unsigned Op, raw_ostream &O);
void printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 1287977..cd06060 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -1,4 +1,4 @@
-//===-- X86ShuffleDecode.h - X86 shuffle decode logic ---------------------===//
+//===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -95,12 +95,29 @@ void DecodePSHUFLWMask(unsigned Imm,
ShuffleMask.push_back(7);
}
-void DecodePUNPCKLMask(unsigned NElts,
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
+
+void DecodePUNPCKLMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i);
- ShuffleMask.push_back(i+NElts);
- }
+ DecodeUNPCKLPMask(VT, ShuffleMask);
}
void DecodePUNPCKHMask(unsigned NElts,
@@ -133,15 +150,40 @@ void DecodeUNPCKHPMask(unsigned NElts,
}
}
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask);
+}
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask) {
+ DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask);
+}
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-void DecodeUNPCKLPMask(unsigned NElts,
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask) {
- for (unsigned i = 0; i != NElts/2; ++i) {
- ShuffleMask.push_back(i); // Reads from dest
- ShuffleMask.push_back(i+NElts); // Reads from src
+ unsigned NumElts = VT.getVectorNumElements();
+
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts / 2;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start; i != End; ++i) {
+ ShuffleMask.push_back(i); // Reads from dest/src1
+ ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2
+ }
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
}
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index 50d9ccb..b18f670 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -16,6 +16,7 @@
#define X86_SHUFFLE_DECODE_H
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/ValueTypes.h"
//===----------------------------------------------------------------------===//
// Vector Mask Decoding
@@ -45,7 +46,19 @@ void DecodePSHUFHWMask(unsigned Imm,
void DecodePSHUFLWMask(unsigned Imm,
SmallVectorImpl<unsigned> &ShuffleMask);
-void DecodePUNPCKLMask(unsigned NElts,
+void DecodePUNPCKLBWMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLWDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLQDQMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodePUNPCKLMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
void DecodePUNPCKHMask(unsigned NElts,
@@ -57,11 +70,16 @@ void DecodeSHUFPSMask(unsigned NElts, unsigned Imm,
void DecodeUNPCKHPMask(unsigned NElts,
SmallVectorImpl<unsigned> &ShuffleMask);
+void DecodeUNPCKLPSMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
+
+void DecodeUNPCKLPDMask(unsigned NElts,
+ SmallVectorImpl<unsigned> &ShuffleMask);
/// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd
-/// etc. NElts indicates the number of elements in the vector allowing it to
-/// handle different datatypes and vector widths.
-void DecodeUNPCKLPMask(unsigned NElts,
+/// etc. VT indicates the type of the vector allowing it to handle different
+/// datatypes and vector widths.
+void DecodeUNPCKLPMask(EVT VT,
SmallVectorImpl<unsigned> &ShuffleMask);
} // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index efb6c8c..25b8d3e 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -1,13 +1,13 @@
//===- X86.td - Target definition file for the Intel X86 ---*- tablegen -*-===//
-//
+//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
-//
+//
//===----------------------------------------------------------------------===//
//
-// This is a target description file for the Intel i386 architecture, refered to
+// This is a target description file for the Intel i386 architecture, referred to
// here as the "X86" architecture.
//
//===----------------------------------------------------------------------===//
@@ -32,7 +32,7 @@ def FeatureMMX : SubtargetFeature<"mmx","X86SSELevel", "MMX",
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
"Enable SSE instructions",
// SSE codegen depends on cmovs, and all
- // SSE1+ processors support them.
+ // SSE1+ processors support them.
[FeatureMMX, FeatureCMOV]>;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
@@ -50,7 +50,8 @@ def FeatureSSE42 : SubtargetFeature<"sse42", "X86SSELevel", "SSE42",
"Enable SSE 4.2 instructions",
[FeatureSSE41, FeaturePOPCNT]>;
def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow",
- "Enable 3DNow! instructions">;
+ "Enable 3DNow! instructions",
+ [FeatureMMX]>;
def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
"Enable 3DNow! Athlon instructions",
[Feature3DNow]>;
@@ -125,10 +126,10 @@ def : Proc<"sandybridge", [FeatureSSE42, Feature64Bit,
FeatureAES, FeatureCLMUL]>;
def : Proc<"k6", [FeatureMMX]>;
-def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
-def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
-def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k6-2", [Feature3DNow]>;
+def : Proc<"k6-3", [Feature3DNow]>;
+def : Proc<"athlon", [Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
@@ -156,8 +157,8 @@ def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
-def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
-def : Proc<"c3", [FeatureMMX, Feature3DNow]>;
+def : Proc<"winchip2", [Feature3DNow]>;
+def : Proc<"c3", [Feature3DNow]>;
def : Proc<"c3-2", [FeatureSSE1]>;
//===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
index da5f5b1..4d7d96d 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmBackend.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -28,6 +29,13 @@
#include "llvm/Target/TargetAsmBackend.h"
using namespace llvm;
+// Option to allow disabling arithmetic relaxation to workaround PR9807, which
+// is useful when running bitwise comparison experiments on Darwin. We should be
+// able to remove this once PR9807 is resolved.
+static cl::opt<bool>
+MCDisableArithRelaxation("mc-x86-disable-arith-relaxation",
+ cl::desc("Disable relaxation of arithmetic instruction for X86"));
+
static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: assert(0 && "invalid fixup kind!");
@@ -201,6 +209,9 @@ bool X86AsmBackend::MayNeedRelaxation(const MCInst &Inst) const {
if (getRelaxedOpcodeBranch(Inst.getOpcode()) != Inst.getOpcode())
return true;
+ if (MCDisableArithRelaxation)
+ return false;
+
// Check if this instruction is ever relaxable.
if (getRelaxedOpcodeArith(Inst.getOpcode()) == Inst.getOpcode())
return false;
@@ -307,10 +318,13 @@ public:
: ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new X86ELFObjectWriter(false, OSType,
- ELF::EM_386, false),
+ return createELFObjectWriter(createELFObjectTargetWriter(),
OS, /*IsLittleEndian*/ true);
}
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(false, OSType, ELF::EM_386, false);
+ }
};
class ELFX86_64AsmBackend : public ELFX86AsmBackend {
@@ -319,10 +333,13 @@ public:
: ELFX86AsmBackend(T, OSType) {}
MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
- return createELFObjectWriter(new X86ELFObjectWriter(true, OSType,
- ELF::EM_X86_64, true),
+ return createELFObjectWriter(createELFObjectTargetWriter(),
OS, /*IsLittleEndian*/ true);
}
+
+ MCELFObjectTargetWriter *createELFObjectTargetWriter() const {
+ return new X86ELFObjectWriter(true, OSType, ELF::EM_X86_64, true);
+ }
};
class WindowsX86AsmBackend : public X86AsmBackend {
@@ -408,34 +425,26 @@ public:
TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return new DarwinX86_32AsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (Triple(TT).getEnvironment() == Triple::MachO)
- return new DarwinX86_32AsmBackend(T);
- else
- return new WindowsX86AsmBackend(T, false);
- default:
- return new ELFX86_32AsmBackend(T, Triple(TT).getOS());
- }
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, false);
+
+ return new ELFX86_32AsmBackend(T, TheTriple.getOS());
}
TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
const std::string &TT) {
- switch (Triple(TT).getOS()) {
- case Triple::Darwin:
+ Triple TheTriple(TT);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return new DarwinX86_64AsmBackend(T);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (Triple(TT).getEnvironment() == Triple::MachO)
- return new DarwinX86_64AsmBackend(T);
- else
- return new WindowsX86AsmBackend(T, true);
- default:
- return new ELFX86_64AsmBackend(T, Triple(TT).getOS());
- }
+
+ if (TheTriple.isOSWindows())
+ return new WindowsX86AsmBackend(T, true);
+
+ return new ELFX86_64AsmBackend(T, TheTriple.getOS());
}
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 99b4479..c2d53c4 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -709,12 +709,13 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
//===----------------------------------------------------------------------===//
static MCInstPrinter *createX86MCInstPrinter(const Target &T,
+ TargetMachine &TM,
unsigned SyntaxVariant,
const MCAsmInfo &MAI) {
if (SyntaxVariant == 0)
- return new X86ATTInstPrinter(MAI);
+ return new X86ATTInstPrinter(TM, MAI);
if (SyntaxVariant == 1)
- return new X86IntelInstPrinter(MAI);
+ return new X86IntelInstPrinter(TM, MAI);
return 0;
}
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index a44fb69..5635175 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -215,6 +215,13 @@ def CC_X86_Win64_C : CallingConv<[
// The first 4 integer arguments are passed in integer registers.
CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
[XMM0, XMM1, XMM2, XMM3]>>,
+
+ // Do not pass the sret argument in RCX, the Win64 thiscall calling
+ // convention requires "this" to be passed in RCX.
+ CCIfCC<"CallingConv::X86_ThisCall",
+ CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[RDX , R8 , R9 ],
+ [XMM1, XMM2, XMM3]>>>>,
+
CCIfType<[i64], CCAssignToRegWithShadow<[RCX , RDX , R8 , R9 ],
[XMM0, XMM1, XMM2, XMM3]>>,
diff --git a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
index 60d9d4a..421e221 100644
--- a/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp
@@ -652,6 +652,8 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::TB: // Two-byte opcode prefix
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
Need0FPrefix = true;
break;
case X86II::TF: // F2 0F 38
@@ -695,6 +697,12 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
case X86II::TA: // 0F 3A
MCE.emitByte(0x3A);
break;
+ case X86II::A6: // 0F A6
+ MCE.emitByte(0xA6);
+ break;
+ case X86II::A7: // 0F A7
+ MCE.emitByte(0xA7);
+ break;
}
// If this is a two-address instruction, skip one of the register operands.
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index 6fa9284..1382f18 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -23,6 +23,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/Operator.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/FunctionLoweringInfo.h"
@@ -77,10 +78,8 @@ private:
bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
- bool X86FastEmitStore(EVT VT, const Value *Val,
- const X86AddressMode &AM);
- bool X86FastEmitStore(EVT VT, unsigned Val,
- const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
+ bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
unsigned &ResultReg);
@@ -125,6 +124,8 @@ private:
unsigned TargetMaterializeAlloca(const AllocaInst *C);
+ unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
+
/// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
/// computed in an SSE register, not on the X87 floating point stack.
bool isScalarFPTypeInSSEReg(EVT VT) const {
@@ -133,6 +134,9 @@ private:
}
bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
+
+ bool TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len);
};
} // end anonymous namespace.
@@ -224,8 +228,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
/// and a displacement offset, or a GlobalAddress,
/// i.e. V. Return true if it is possible.
bool
-X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
- const X86AddressMode &AM) {
+X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
// Get opcode and regclass of the output for the given store instruction.
unsigned Opc = 0;
switch (VT.getSimpleVT().SimpleTy) {
@@ -395,37 +398,45 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
const Value *Op = *i;
if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
const StructLayout *SL = TD.getStructLayout(STy);
- unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
- Disp += SL->getElementOffset(Idx);
- } else {
- uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
- SmallVector<const Value *, 4> Worklist;
- Worklist.push_back(Op);
- do {
- Op = Worklist.pop_back_val();
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
- // Constant-offset addressing.
- Disp += CI->getSExtValue() * S;
- } else if (isa<AddOperator>(Op) &&
- isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
- // An add with a constant operand. Fold the constant.
- ConstantInt *CI =
- cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
- Disp += CI->getSExtValue() * S;
- // Add the other operand back to the work list.
- Worklist.push_back(cast<AddOperator>(Op)->getOperand(0));
- } else if (IndexReg == 0 &&
- (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
- (S == 1 || S == 2 || S == 4 || S == 8)) {
- // Scaled-index addressing.
- Scale = S;
- IndexReg = getRegForGEPIndex(Op).first;
- if (IndexReg == 0)
- return false;
- } else
- // Unsupported.
- goto unsupported_gep;
- } while (!Worklist.empty());
+ Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
+ continue;
+ }
+
+ // A array/variable index is always of the form i*S where S is the
+ // constant scale size. See if we can push the scale into immediates.
+ uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
+ for (;;) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
+ // Constant-offset addressing.
+ Disp += CI->getSExtValue() * S;
+ break;
+ }
+ if (isa<AddOperator>(Op) &&
+ (!isa<Instruction>(Op) ||
+ FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
+ == FuncInfo.MBB) &&
+ isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
+ // An add (in the same block) with a constant operand. Fold the
+ // constant.
+ ConstantInt *CI =
+ cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
+ Disp += CI->getSExtValue() * S;
+ // Iterate on the other operand.
+ Op = cast<AddOperator>(Op)->getOperand(0);
+ continue;
+ }
+ if (IndexReg == 0 &&
+ (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
+ (S == 1 || S == 2 || S == 4 || S == 8)) {
+ // Scaled-index addressing.
+ Scale = S;
+ IndexReg = getRegForGEPIndex(Op).first;
+ if (IndexReg == 0)
+ return false;
+ break;
+ }
+ // Unsupported.
+ goto unsupported_gep;
}
}
// Check for displacement overflow.
@@ -439,7 +450,7 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
if (X86SelectAddress(U->getOperand(0), AM))
return true;
- // If we couldn't merge the sub value into this addr mode, revert back to
+ // If we couldn't merge the gep value into this addr mode, revert back to
// our address and just match the value instead of completely failing.
AM = SavedAM;
break;
@@ -451,91 +462,91 @@ bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
// Handle constant address.
if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
- // Can't handle alternate code models yet.
+ // Can't handle alternate code models or TLS yet.
if (TM.getCodeModel() != CodeModel::Small)
return false;
- // RIP-relative addresses can't have additional register operands.
- if (Subtarget->isPICStyleRIPRel() &&
- (AM.Base.Reg != 0 || AM.IndexReg != 0))
- return false;
-
- // Can't handle TLS yet.
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
if (GVar->isThreadLocal())
return false;
+
+ // RIP-relative addresses can't have additional register operands, so if
+ // we've already folded stuff into the addressing mode, just force the
+ // global value into its own register, which we can use as the basereg.
+ if (!Subtarget->isPICStyleRIPRel() ||
+ (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
+ // Okay, we've committed to selecting this global. Set up the address.
+ AM.GV = GV;
+
+ // Allow the subtarget to classify the global.
+ unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
+
+ // If this reference is relative to the pic base, set it now.
+ if (isGlobalRelativeToPICBase(GVFlags)) {
+ // FIXME: How do we know Base.Reg is free??
+ AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
+ }
- // Okay, we've committed to selecting this global. Set up the basic address.
- AM.GV = GV;
-
- // Allow the subtarget to classify the global.
- unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
-
- // If this reference is relative to the pic base, set it now.
- if (isGlobalRelativeToPICBase(GVFlags)) {
- // FIXME: How do we know Base.Reg is free??
- AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
- }
-
- // Unless the ABI requires an extra load, return a direct reference to
- // the global.
- if (!isGlobalStubReference(GVFlags)) {
- if (Subtarget->isPICStyleRIPRel()) {
- // Use rip-relative addressing if we can. Above we verified that the
- // base and index registers are unused.
- assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
- AM.Base.Reg = X86::RIP;
+ // Unless the ABI requires an extra load, return a direct reference to
+ // the global.
+ if (!isGlobalStubReference(GVFlags)) {
+ if (Subtarget->isPICStyleRIPRel()) {
+ // Use rip-relative addressing if we can. Above we verified that the
+ // base and index registers are unused.
+ assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
+ AM.Base.Reg = X86::RIP;
+ }
+ AM.GVOpFlags = GVFlags;
+ return true;
}
- AM.GVOpFlags = GVFlags;
- return true;
- }
- // Ok, we need to do a load from a stub. If we've already loaded from this
- // stub, reuse the loaded pointer, otherwise emit the load now.
- DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
- unsigned LoadReg;
- if (I != LocalValueMap.end() && I->second != 0) {
- LoadReg = I->second;
- } else {
- // Issue load from stub.
- unsigned Opc = 0;
- const TargetRegisterClass *RC = NULL;
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = GV;
- StubAM.GVOpFlags = GVFlags;
-
- // Prepare for inserting code in the local-value area.
- SavePoint SaveInsertPt = enterLocalValueArea();
-
- if (TLI.getPointerTy() == MVT::i64) {
- Opc = X86::MOV64rm;
- RC = X86::GR64RegisterClass;
-
- if (Subtarget->isPICStyleRIPRel())
- StubAM.Base.Reg = X86::RIP;
+ // Ok, we need to do a load from a stub. If we've already loaded from
+ // this stub, reuse the loaded pointer, otherwise emit the load now.
+ DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
+ unsigned LoadReg;
+ if (I != LocalValueMap.end() && I->second != 0) {
+ LoadReg = I->second;
} else {
- Opc = X86::MOV32rm;
- RC = X86::GR32RegisterClass;
- }
+ // Issue load from stub.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = GV;
+ StubAM.GVOpFlags = GVFlags;
+
+ // Prepare for inserting code in the local-value area.
+ SavePoint SaveInsertPt = enterLocalValueArea();
+
+ if (TLI.getPointerTy() == MVT::i64) {
+ Opc = X86::MOV64rm;
+ RC = X86::GR64RegisterClass;
+
+ if (Subtarget->isPICStyleRIPRel())
+ StubAM.Base.Reg = X86::RIP;
+ } else {
+ Opc = X86::MOV32rm;
+ RC = X86::GR32RegisterClass;
+ }
- LoadReg = createResultReg(RC);
- MachineInstrBuilder LoadMI =
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
- addFullAddress(LoadMI, StubAM);
+ LoadReg = createResultReg(RC);
+ MachineInstrBuilder LoadMI =
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
+ addFullAddress(LoadMI, StubAM);
- // Ok, back to normal mode.
- leaveLocalValueArea(SaveInsertPt);
+ // Ok, back to normal mode.
+ leaveLocalValueArea(SaveInsertPt);
- // Prevent loading GV stub multiple times in same MBB.
- LocalValueMap[V] = LoadReg;
- }
+ // Prevent loading GV stub multiple times in same MBB.
+ LocalValueMap[V] = LoadReg;
+ }
- // Now construct the final address. Note that the Disp, Scale,
- // and Index values may already be set here.
- AM.Base.Reg = LoadReg;
- AM.GV = 0;
- return true;
+ // Now construct the final address. Note that the Disp, Scale,
+ // and Index values may already be set here.
+ AM.Base.Reg = LoadReg;
+ AM.GV = 0;
+ return true;
+ }
}
// If all else fails, try to materialize the value in a register.
@@ -856,12 +867,9 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
unsigned NEReg = createResultReg(&X86::GR8RegClass);
unsigned PReg = createResultReg(&X86::GR8RegClass);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::SETNEr), NEReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::SETPr), PReg);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
- TII.get(X86::OR8rr), ResultReg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
.addReg(PReg).addReg(NEReg);
UpdateValueMap(I, ResultReg);
return true;
@@ -1059,14 +1067,49 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
}
}
+ } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
+ // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
+ // typically happen for _Bool and C++ bools.
+ MVT SourceVT;
+ if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
+ isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
+ unsigned TestOpc = 0;
+ switch (SourceVT.SimpleTy) {
+ default: break;
+ case MVT::i8: TestOpc = X86::TEST8ri; break;
+ case MVT::i16: TestOpc = X86::TEST16ri; break;
+ case MVT::i32: TestOpc = X86::TEST32ri; break;
+ case MVT::i64: TestOpc = X86::TEST64ri32; break;
+ }
+ if (TestOpc) {
+ unsigned OpReg = getRegForValue(TI->getOperand(0));
+ if (OpReg == 0) return false;
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
+ .addReg(OpReg).addImm(1);
+
+ unsigned JmpOpc = X86::JNE_4;
+ if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
+ std::swap(TrueMBB, FalseMBB);
+ JmpOpc = X86::JE_4;
+ }
+
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
+ .addMBB(TrueMBB);
+ FastEmitBranch(FalseMBB, DL);
+ FuncInfo.MBB->addSuccessor(TrueMBB);
+ return true;
+ }
+ }
}
// Otherwise do a clumsy setcc and re-test it.
+ // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
+ // in an explicit cast, so make sure to handle that correctly.
unsigned OpReg = getRegForValue(BI->getCondition());
if (OpReg == 0) return false;
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
- .addReg(OpReg).addReg(OpReg);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
+ .addReg(OpReg).addImm(1);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
.addMBB(TrueMBB);
FastEmitBranch(FalseMBB, DL);
@@ -1075,42 +1118,42 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
}
bool X86FastISel::X86SelectShift(const Instruction *I) {
- unsigned CReg = 0, OpReg = 0, OpImm = 0;
+ unsigned CReg = 0, OpReg = 0;
const TargetRegisterClass *RC = NULL;
if (I->getType()->isIntegerTy(8)) {
CReg = X86::CL;
RC = &X86::GR8RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
- case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
- case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
+ case Instruction::LShr: OpReg = X86::SHR8rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR8rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL8rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(16)) {
CReg = X86::CX;
RC = &X86::GR16RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
- case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
- case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
+ case Instruction::LShr: OpReg = X86::SHR16rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR16rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL16rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(32)) {
CReg = X86::ECX;
RC = &X86::GR32RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
- case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
- case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
+ case Instruction::LShr: OpReg = X86::SHR32rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR32rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL32rCL; break;
default: return false;
}
} else if (I->getType()->isIntegerTy(64)) {
CReg = X86::RCX;
RC = &X86::GR64RegClass;
switch (I->getOpcode()) {
- case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
- case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
- case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
+ case Instruction::LShr: OpReg = X86::SHR64rCL; break;
+ case Instruction::AShr: OpReg = X86::SAR64rCL; break;
+ case Instruction::Shl: OpReg = X86::SHL64rCL; break;
default: return false;
}
} else {
@@ -1124,15 +1167,6 @@ bool X86FastISel::X86SelectShift(const Instruction *I) {
unsigned Op0Reg = getRegForValue(I->getOperand(0));
if (Op0Reg == 0) return false;
- // Fold immediate in shl(x,3).
- if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
- unsigned ResultReg = createResultReg(RC);
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm),
- ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
- UpdateValueMap(I, ResultReg);
- return true;
- }
-
unsigned Op1Reg = getRegForValue(I->getOperand(1));
if (Op1Reg == 0) return false;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
@@ -1294,10 +1328,61 @@ bool X86FastISel::X86SelectExtractValue(const Instruction *I) {
return false;
}
+bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
+ X86AddressMode SrcAM, uint64_t Len) {
+ // Make sure we don't bloat code by inlining very large memcpy's.
+ bool i64Legal = TLI.isTypeLegal(MVT::i64);
+ if (Len > (i64Legal ? 32 : 16)) return false;
+
+ // We don't care about alignment here since we just emit integer accesses.
+ while (Len) {
+ MVT VT;
+ if (Len >= 8 && i64Legal)
+ VT = MVT::i64;
+ else if (Len >= 4)
+ VT = MVT::i32;
+ else if (Len >= 2)
+ VT = MVT::i16;
+ else {
+ assert(Len == 1);
+ VT = MVT::i8;
+ }
+
+ unsigned Reg;
+ bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
+ RV &= X86FastEmitStore(VT, Reg, DestAM);
+ assert(RV && "Failed to emit load or store??");
+
+ unsigned Size = VT.getSizeInBits()/8;
+ Len -= Size;
+ DestAM.Disp += Size;
+ SrcAM.Disp += Size;
+ }
+
+ return true;
+}
+
bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// FIXME: Handle more intrinsics.
switch (I.getIntrinsicID()) {
default: return false;
+ case Intrinsic::memcpy: {
+ const MemCpyInst &MCI = cast<MemCpyInst>(I);
+ // Don't handle volatile or variable length memcpys.
+ if (MCI.isVolatile() || !isa<ConstantInt>(MCI.getLength()))
+ return false;
+
+ uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
+
+ // Get the address of the dest and source addresses.
+ X86AddressMode DestAM, SrcAM;
+ if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
+ !X86SelectAddress(MCI.getRawSource(), SrcAM))
+ return false;
+
+ return TryEmitSmallMemcpy(DestAM, SrcAM, Len);
+ }
+
case Intrinsic::stackprotector: {
// Emit code inline code to store the stack guard onto the stack.
EVT PtrTy = TLI.getPointerTy();
@@ -1308,17 +1393,14 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
// Grab the frame index.
X86AddressMode AM;
if (!X86SelectAddress(Slot, AM)) return false;
-
if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
-
return true;
}
case Intrinsic::objectsize: {
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
+ // FIXME: This should be moved to generic code!
+ ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
const Type *Ty = I.getCalledFunction()->getReturnType();
- assert(CI && "Non-constant type in Intrinsic::objectsize?");
-
MVT VT;
if (!isTypeLegal(Ty, VT))
return false;
@@ -1356,6 +1438,8 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
}
case Intrinsic::sadd_with_overflow:
case Intrinsic::uadd_with_overflow: {
+ // FIXME: Should fold immediates.
+
// Replace "add with overflow" intrinsics with an "add" instruction followed
// by a seto/setc instruction. Later on, when the "extractvalue"
// instructions are encountered, we use the fact that two registers were
@@ -1427,8 +1511,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
// Handle only C and fastcc calling conventions for now.
ImmutableCallSite CS(CI);
CallingConv::ID CC = CS.getCallingConv();
- if (CC != CallingConv::C &&
- CC != CallingConv::Fast &&
+ if (CC != CallingConv::C && CC != CallingConv::Fast &&
CC != CallingConv::X86_FastCall)
return false;
@@ -1437,14 +1520,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
return false;
- // Let SDISel handle vararg functions.
const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
- if (FTy->isVarArg())
+ bool isVarArg = FTy->isVarArg();
+
+ // Don't know how to handle Win64 varargs yet. Nothing special needed for
+ // x86-32. Special handling for x86-64 is implemented.
+ if (isVarArg && Subtarget->isTargetWin64())
return false;
// Fast-isel doesn't know about callee-pop yet.
- if (Subtarget->IsCalleePop(FTy->isVarArg(), CC))
+ if (Subtarget->IsCalleePop(isVarArg, CC))
return false;
// Handle *simple* calls for now.
@@ -1487,9 +1573,7 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
ArgFlags.reserve(CS.arg_size());
for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
i != e; ++i) {
- unsigned Arg = getRegForValue(*i);
- if (Arg == 0)
- return false;
+ Value *ArgVal = *i;
ISD::ArgFlagsTy Flags;
unsigned AttrInd = i - CS.arg_begin() + 1;
if (CS.paramHasAttr(AttrInd, Attribute::SExt))
@@ -1497,34 +1581,67 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
Flags.setZExt();
+ // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
+ // instruction. This is safe because it is common to all fastisel supported
+ // calling conventions on x86.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
+ if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
+ CI->getBitWidth() == 16) {
+ if (Flags.isSExt())
+ ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
+ else
+ ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
+ }
+ }
+
+ unsigned ArgReg;
+
+ // Passing bools around ends up doing a trunc to i1 and passing it.
+ // Codegen this as an argument + "and 1".
+ if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
+ cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
+ ArgVal->hasOneUse()) {
+ ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
+ ArgReg = getRegForValue(ArgVal);
+ if (ArgReg == 0) return false;
+
+ MVT ArgVT;
+ if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
+
+ ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
+ ArgVal->hasOneUse(), 1);
+ } else {
+ ArgReg = getRegForValue(ArgVal);
+ }
+
+ if (ArgReg == 0) return false;
+
// FIXME: Only handle *easy* calls for now.
if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
- CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
CS.paramHasAttr(AttrInd, Attribute::Nest) ||
CS.paramHasAttr(AttrInd, Attribute::ByVal))
return false;
- const Type *ArgTy = (*i)->getType();
+ const Type *ArgTy = ArgVal->getType();
MVT ArgVT;
if (!isTypeLegal(ArgTy, ArgVT))
return false;
unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
Flags.setOrigAlign(OriginalAlignment);
- Args.push_back(Arg);
- ArgVals.push_back(*i);
+ Args.push_back(ArgReg);
+ ArgVals.push_back(ArgVal);
ArgVTs.push_back(ArgVT);
ArgFlags.push_back(Flags);
}
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
- CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
+ CCState CCInfo(CC, isVarArg, TM, ArgLocs, I->getParent()->getContext());
// Allocate shadow area for Win64
- if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetWin64())
CCInfo.AllocateStack(32, 8);
- }
CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
@@ -1618,6 +1735,17 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
X86::EBX).addReg(Base);
}
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
+ // Count the number of XMM registers allocated.
+ static const unsigned XMMArgRegs[] = {
+ X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
+ X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
+ };
+ unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
+ X86::AL).addImm(NumXMMRegs);
+ }
+
// Issue the call.
MachineInstrBuilder MIB;
if (CalleeOp) {
@@ -1656,7 +1784,8 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -1672,14 +1801,20 @@ bool X86FastISel::X86SelectCall(const Instruction *I) {
if (Subtarget->isPICStyleGOT())
MIB.addReg(X86::EBX);
+ if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
+ MIB.addReg(X86::AL);
+
// Add implicit physical register uses to the call.
for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
MIB.addReg(RegArgs[i]);
// Issue CALLSEQ_END
unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+ unsigned NumBytesCallee = 0;
+ if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
+ NumBytesCallee = 4;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
- .addImm(NumBytes).addImm(0);
+ .addImm(NumBytes).addImm(NumBytesCallee);
// Now handle call return value (if any).
SmallVector<unsigned, 4> UsedRegs;
@@ -1850,10 +1985,13 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
if (isa<GlobalValue>(C)) {
X86AddressMode AM;
if (X86SelectAddress(C, AM)) {
- if (TLI.getPointerTy() == MVT::i32)
- Opc = X86::LEA32r;
- else
- Opc = X86::LEA64r;
+ // If the expression is just a basereg, then we're done, otherwise we need
+ // to emit an LEA.
+ if (AM.BaseType == X86AddressMode::RegBase &&
+ AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
+ return AM.Base.Reg;
+
+ Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(Opc), ResultReg), AM);
@@ -1915,6 +2053,45 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
return ResultReg;
}
+unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
+ MVT VT;
+ if (!isTypeLegal(CF->getType(), VT))
+ return false;
+
+ // Get opcode and regclass for the given zero.
+ unsigned Opc = 0;
+ const TargetRegisterClass *RC = NULL;
+ switch (VT.SimpleTy) {
+ default: return false;
+ case MVT::f32:
+ if (Subtarget->hasSSE1()) {
+ Opc = X86::FsFLD0SS;
+ RC = X86::FR32RegisterClass;
+ } else {
+ Opc = X86::LD_Fp032;
+ RC = X86::RFP32RegisterClass;
+ }
+ break;
+ case MVT::f64:
+ if (Subtarget->hasSSE2()) {
+ Opc = X86::FsFLD0SD;
+ RC = X86::FR64RegisterClass;
+ } else {
+ Opc = X86::LD_Fp064;
+ RC = X86::RFP64RegisterClass;
+ }
+ break;
+ case MVT::f80:
+ // No f80 support yet.
+ return false;
+ }
+
+ unsigned ResultReg = createResultReg(RC);
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
+ return ResultReg;
+}
+
+
/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
/// vreg is being provided by the specified load instruction. If possible,
/// try to fold the load as an operand to the instruction, returning true if
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 3aaa693..325d061 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -1307,7 +1307,7 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
// set up by FpSET_ST0, and our StackTop is off by one because of it.
unsigned Op0 = getFPReg(MI->getOperand(0));
// Restore the actual StackTop from before Fp_SET_ST0.
- // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
+ // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
// are not enforcing the constraint.
++StackTop;
unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 0a3f931..06d12fc 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -22,6 +22,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
@@ -296,7 +297,7 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction &MF,
// FIXME: This is dirty hack. The code itself is pretty mess right now.
// It should be rewritten from scratch and generalized sometimes.
- // Determine maximum offset (minumum due to stack growth).
+ // Determine maximum offset (minimum due to stack growth).
int64_t MaxOffset = 0;
for (std::vector<CalleeSavedInfo>::const_iterator
I = CSI.begin(), E = CSI.end(); I != E; ++I)
@@ -551,65 +552,71 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
// responsible for adjusting the stack pointer. Touching the stack at 4K
// increments is necessary to ensure that the guard pages used by the OS
// virtual memory manager are allocated in correct sequence.
- if (NumBytes >= 4096 &&
- (STI.isTargetCygMing() || STI.isTargetWin32()) &&
- !STI.isTargetEnvMacho()) {
+ if (NumBytes >= 4096 && STI.isTargetCOFF() && !STI.isTargetEnvMacho()) {
+ const char *StackProbeSymbol;
+ bool isSPUpdateNeeded = false;
+
+ if (Is64Bit) {
+ if (STI.isTargetCygMing())
+ StackProbeSymbol = "___chkstk";
+ else {
+ StackProbeSymbol = "__chkstk";
+ isSPUpdateNeeded = true;
+ }
+ } else if (STI.isTargetCygMing())
+ StackProbeSymbol = "_alloca";
+ else
+ StackProbeSymbol = "_chkstk";
+
// Check whether EAX is livein for this function.
bool isEAXAlive = isEAXLiveIn(MF);
- const char *StackProbeSymbol =
- STI.isTargetWindows() ? "_chkstk" : "_alloca";
- if (Is64Bit && STI.isTargetCygMing())
- StackProbeSymbol = "__chkstk";
- unsigned CallOp = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
- if (!isEAXAlive) {
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(CallOp))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
- } else {
+ if (isEAXAlive) {
+ // Sanity check that EAX is not livein for this function.
+ // It should not be, so throw an assert.
+ assert(!Is64Bit && "EAX is livein in x64 case!");
+
// Save EAX
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
.addReg(X86::EAX, RegState::Kill);
+ }
- // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
- // allocated bytes for EAX.
+ if (Is64Bit) {
+ // Handle the 64-bit Windows ABI case where we need to call __chkstk.
+ // Function prologue is responsible for adjusting the stack pointer.
+ BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
+ .addImm(NumBytes);
+ } else {
+ // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
+ // We'll also use 4 already allocated bytes for EAX.
BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes - 4);
- BuildMI(MBB, MBBI, DL, TII.get(CallOp))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(StackPtr, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
-
- // Restore EAX
- MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
- X86::EAX),
- StackPtr, false, NumBytes - 4);
- MBB.insert(MBBI, MI);
+ .addImm(isEAXAlive ? NumBytes - 4 : NumBytes);
+ }
+
+ BuildMI(MBB, MBBI, DL,
+ TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(StackPtr, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+
+ // MSVC x64's __chkstk needs to adjust %rsp.
+ // FIXME: %rax preserves the offset and should be available.
+ if (isSPUpdateNeeded)
+ emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
+ TII, *RegInfo);
+
+ if (isEAXAlive) {
+ // Restore EAX
+ MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
+ X86::EAX),
+ StackPtr, false, NumBytes - 4);
+ MBB.insert(MBBI, MI);
}
- } else if (NumBytes >= 4096 &&
- STI.isTargetWin64() &&
- !STI.isTargetEnvMacho()) {
- // Sanity check that EAX is not livein for this function. It should
- // not be, so throw an assert.
- assert(!isEAXLiveIn(MF) && "EAX is livein in the Win64 case!");
-
- // Handle the 64-bit Windows ABI case where we need to call __chkstk.
- // Function prologue is responsible for adjusting the stack pointer.
- BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
- .addImm(NumBytes);
- BuildMI(MBB, MBBI, DL, TII.get(X86::WINCALL64pcrel32))
- .addExternalSymbol("__chkstk")
- .addReg(StackPtr, RegState::Define | RegState::Implicit);
- emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
- TII, *RegInfo);
} else if (NumBytes)
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
TII, *RegInfo);
- if ((NumBytes || PushedRegs) && needsFrameMoves) {
+ if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
// Mark end of stack pointer adjustment.
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
BuildMI(MBB, MBBI, DL, TII.get(X86::PROLOG_LABEL)).addSym(Label);
@@ -779,7 +786,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
assert(Offset >= 0 && "Offset should never be negative");
if (Offset) {
- // Check for possible merge with preceeding ADD instruction.
+ // Check for possible merge with preceding ADD instruction.
Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, TII, *RegInfo);
}
@@ -823,7 +830,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
int delta = -1*X86FI->getTCReturnAddrDelta();
MBBI = MBB.getLastNonDebugInstr();
- // Check for possible merge with preceeding ADD instruction.
+ // Check for possible merge with preceding ADD instruction.
delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, TII, *RegInfo);
}
@@ -892,7 +899,6 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
- bool isWin64 = STI.isTargetWin64();
unsigned SlotSize = STI.is64Bit() ? 8 : 4;
unsigned FPReg = TRI->getFrameRegister(MF);
unsigned CalleeFrameSize = 0;
@@ -900,25 +906,39 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+ // Push GPRs. It increases frame size.
unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
for (unsigned i = CSI.size(); i != 0; --i) {
unsigned Reg = CSI[i-1].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
// Add the callee-saved register as live-in. It's killed at the spill.
MBB.addLiveIn(Reg);
if (Reg == FPReg)
// X86RegisterInfo::emitPrologue will handle spilling of frame register.
continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- CalleeFrameSize += SlotSize;
- BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
- RC, TRI);
- }
+ CalleeFrameSize += SlotSize;
+ BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
}
X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+ // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
+ // It can be done by spilling XMMs to stack frame.
+ // Note that only Win64 ABI might spill XMMs.
+ for (unsigned i = CSI.size(); i != 0; --i) {
+ unsigned Reg = CSI[i-1].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ // Add the callee-saved register as live-in. It's killed at the spill.
+ MBB.addLiveIn(Reg);
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(),
+ RC, TRI);
+ }
+
return true;
}
@@ -933,21 +953,30 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineFunction &MF = *MBB.getParent();
const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
+
+ // Reload XMMs from stack frame.
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned Reg = CSI[i].getReg();
+ if (X86::GR64RegClass.contains(Reg) ||
+ X86::GR32RegClass.contains(Reg))
+ continue;
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
+ RC, TRI);
+ }
+
+ // POP GPRs.
unsigned FPReg = TRI->getFrameRegister(MF);
- bool isWin64 = STI.isTargetWin64();
unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
+ if (!X86::GR64RegClass.contains(Reg) &&
+ !X86::GR32RegClass.contains(Reg))
+ continue;
if (Reg == FPReg)
// X86RegisterInfo::emitEpilogue will handle restoring of frame register.
continue;
- if (!X86::VR128RegClass.contains(Reg) && !isWin64) {
- BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
- } else {
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(),
- RC, TRI);
- }
+ BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
}
return true;
}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9b0ec6e..4534e85 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1580,6 +1580,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal;
break;
}
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR: {
+ // For operations of the form (x << C1) op C2, check if we can use a smaller
+ // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
+ SDValue N0 = Node->getOperand(0);
+ SDValue N1 = Node->getOperand(1);
+
+ if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
+ break;
+
+ // i8 is unshrinkable, i16 should be promoted to i32.
+ if (NVT != MVT::i32 && NVT != MVT::i64)
+ break;
+
+ ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
+ ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (!Cst || !ShlCst)
+ break;
+
+ int64_t Val = Cst->getSExtValue();
+ uint64_t ShlVal = ShlCst->getZExtValue();
+
+ // Make sure that we don't change the operation by removing bits.
+ // This only matters for OR and XOR, AND is unaffected.
+ if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
+ break;
+
+ unsigned ShlOp, Op = 0;
+ EVT CstVT = NVT;
+
+ // Check the minimum bitwidth for the new constant.
+ // TODO: AND32ri is the same as AND64ri32 with zext imm.
+ // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
+ // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
+ if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
+ CstVT = MVT::i8;
+ else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
+ CstVT = MVT::i32;
+
+ // Bail if there is no smaller encoding.
+ if (NVT == CstVT)
+ break;
+
+ switch (NVT.getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unsupported VT!");
+ case MVT::i32:
+ assert(CstVT == MVT::i8);
+ ShlOp = X86::SHL32ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = X86::AND32ri8; break;
+ case ISD::OR: Op = X86::OR32ri8; break;
+ case ISD::XOR: Op = X86::XOR32ri8; break;
+ }
+ break;
+ case MVT::i64:
+ assert(CstVT == MVT::i8 || CstVT == MVT::i32);
+ ShlOp = X86::SHL64ri;
+
+ switch (Opcode) {
+ case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
+ case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
+ case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
+ }
+ break;
+ }
+
+ // Emit the smaller op and the shift.
+ SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
+ SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
+ return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
+ getI8Imm(ShlVal));
+ break;
+ }
case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2f49dbc..703c01d 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45,6 +45,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/VectorExtras.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Dwarf.h"
#include "llvm/Support/ErrorHandling.h"
@@ -221,7 +222,13 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// X86 is weird, it always uses i8 for shift amounts and setcc results.
setBooleanContents(ZeroOrOneBooleanContent);
- setSchedulingPreference(Sched::RegPressure);
+
+ // For 64-bit since we have so many registers use the ILP scheduler, for
+ // 32-bit code use the register pressure specific scheduling.
+ if (Subtarget->is64Bit())
+ setSchedulingPreference(Sched::ILP);
+ else
+ setSchedulingPreference(Sched::RegPressure);
setStackPointerRegisterToSaveRestore(X86StackPtr);
if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
@@ -543,12 +550,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
- if (Subtarget->is64Bit())
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
- if (Subtarget->isTargetCygMing() || Subtarget->isTargetWindows())
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
- else
- setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::DYNAMIC_STACKALLOC,
+ (Subtarget->is64Bit() ? MVT::i64 : MVT::i32),
+ (Subtarget->isTargetCOFF()
+ && !Subtarget->isTargetEnvMacho()
+ ? Custom : Expand));
if (!UseSoftFloat && X86ScalarSSEf64) {
// f32 and f64 use SSE.
@@ -921,6 +927,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Can turn SHL into an integer multiply.
setOperationAction(ISD::SHL, MVT::v4i32, Custom);
setOperationAction(ISD::SHL, MVT::v16i8, Custom);
+ setOperationAction(ISD::SRL, MVT::v4i32, Legal);
// i8 and i16 vectors are custom , because the source register and source
// source memory operand types are not the same width. f32 vectors are
@@ -1271,27 +1278,6 @@ X86TargetLowering::findRepresentativeClass(EVT VT) const{
return std::make_pair(RRC, Cost);
}
-// FIXME: Why this routine is here? Move to RegInfo!
-unsigned
-X86TargetLowering::getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const {
- const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
-
- unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
- switch (RC->getID()) {
- default:
- return 0;
- case X86::GR32RegClassID:
- return 4 - FPDiff;
- case X86::GR64RegClassID:
- return 8 - FPDiff;
- case X86::VR128RegClassID:
- return Subtarget->is64Bit() ? 10 : 4;
- case X86::VR64RegClassID:
- return 4;
- }
-}
-
bool X86TargetLowering::getStackCookieLocation(unsigned &AddressSpace,
unsigned &Offset) const {
if (!Subtarget->isTargetLinux())
@@ -1463,6 +1449,20 @@ bool X86TargetLowering::isUsedByReturnOnly(SDNode *N) const {
return HasRet;
}
+EVT
+X86TargetLowering::getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const {
+ MVT ReturnMVT;
+ // TODO: Is this also valid on 32-bit?
+ if (Subtarget->is64Bit() && VT == MVT::i1 && ExtendKind == ISD::ZERO_EXTEND)
+ ReturnMVT = MVT::i8;
+ else
+ ReturnMVT = MVT::i32;
+
+ EVT MinVT = getRegisterType(Context, ReturnMVT);
+ return VT.bitsLT(MinVT) ? MinVT : VT;
+}
+
/// LowerCallResult - Lower the result values of a call into the
/// appropriate copies out of appropriate physical registers.
///
@@ -1595,6 +1595,18 @@ static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
}
+bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
+ if (!CI->isTailCall())
+ return false;
+
+ CallSite CS(CI);
+ CallingConv::ID CalleeCC = CS.getCallingConv();
+ if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+ return false;
+
+ return true;
+}
+
/// FuncIsMadeTailCallSafe - Return true if the function is being made into
/// a tailcall target by changing its ABI.
static bool FuncIsMadeTailCallSafe(CallingConv::ID CC) {
@@ -1627,8 +1639,9 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
if (Flags.isByVal()) {
- int FI = MFI->CreateFixedObject(Flags.getByValSize(),
- VA.getLocMemOffset(), isImmutable);
+ unsigned Bytes = Flags.getByValSize();
+ if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
+ int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
return DAG.getFrameIndex(FI, getPointerTy());
} else {
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
@@ -1765,8 +1778,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// If the function takes variable number of arguments, make a frame index for
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
- if (!IsWin64 && (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
- CallConv != CallingConv::X86_ThisCall))) {
+ if (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
+ CallConv != CallingConv::X86_ThisCall)) {
FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize,true));
}
if (Is64Bit) {
@@ -1818,7 +1831,9 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
FuncInfo->setRegSaveFrameIndex(
MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
- FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
+ // Fixup to set vararg frame on shadow area (4 x i64).
+ if (NumIntRegs < 4)
+ FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
} else {
// For X86-64, if there are vararg parameters that are passed via
// registers, then we must store them to their spots on the stack so they
@@ -1937,7 +1952,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG,
return SDValue(OutRetAddr.getNode(), 1);
}
-/// EmitTailCallStoreRetAddr - Emit a store of the return adress if tail call
+/// EmitTailCallStoreRetAddr - Emit a store of the return address if tail call
/// optimization is performed and it is required (FPDiff!=0).
static SDValue
EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
@@ -2028,7 +2043,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
SDValue RetAddrFrIdx;
- // Load return adress for tail calls.
+ // Load return address for tail calls.
if (isTailCall && FPDiff)
Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
Is64Bit, FPDiff, dl);
@@ -2185,7 +2200,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
SmallVector<SDValue, 8> MemOpChains2;
SDValue FIN;
int FI = 0;
- // Do not flag preceeding copytoreg stuff together with the following stuff.
+ // Do not flag preceding copytoreg stuff together with the following stuff.
InFlag = SDValue();
if (GuaranteedTailCallOpt) {
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -2266,7 +2281,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
(GV->isDeclaration() || GV->isWeakForLinker()) &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -2285,7 +2301,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
getTargetMachine().getRelocationModel() == Reloc::PIC_) {
OpFlags = X86II::MO_PLT;
} else if (Subtarget->isPICStyleStubAny() &&
- Subtarget->getDarwinVers() < 9) {
+ (!Subtarget->getTargetTriple().isMacOSX() ||
+ Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
// PC-relative references to external symbols should go through $stub,
// unless we're building with the leopard linker or later, which
// automatically synthesizes these stubs.
@@ -3173,7 +3190,8 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
- if (NumElems != 2 && NumElems != 4)
+ if ((NumElems != 2 && NumElems != 4)
+ || N->getValueType(0).getSizeInBits() > 128)
return false;
for (unsigned i = 0; i < NumElems/2; ++i)
@@ -3195,19 +3213,36 @@ static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
return false;
- for (int i = 0, j = 0; i != NumElts; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (V2IsSplat) {
- if (!isUndefOrEqual(BitI1, NumElts))
- return false;
- } else {
- if (!isUndefOrEqual(BitI1, j + NumElts))
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElts / NumSections;
+
+ unsigned Start = 0;
+ unsigned End = NumSectionElts;
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = Start, j = s * NumSectionElts;
+ i != End;
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+ if (!isUndefOrEqual(BitI, j))
return false;
+ if (V2IsSplat) {
+ if (!isUndefOrEqual(BitI1, NumElts))
+ return false;
+ } else {
+ if (!isUndefOrEqual(BitI1, j + NumElts))
+ return false;
+ }
}
+ // Process the next 128 bits.
+ Start += NumSectionElts;
+ End += NumSectionElts;
}
+
return true;
}
@@ -3255,14 +3290,27 @@ static bool isUNPCKL_v_undef_Mask(const SmallVectorImpl<int> &Mask, EVT VT) {
if (NumElems != 2 && NumElems != 4 && NumElems != 8 && NumElems != 16)
return false;
- for (int i = 0, j = 0; i != NumElems; i += 2, ++j) {
- int BitI = Mask[i];
- int BitI1 = Mask[i+1];
- if (!isUndefOrEqual(BitI, j))
- return false;
- if (!isUndefOrEqual(BitI1, j))
- return false;
+ // Handle vector lengths > 128 bits. Define a "section" as a set of
+ // 128 bits. AVX defines UNPCK* to operate independently on 128-bit
+ // sections.
+ unsigned NumSections = VT.getSizeInBits() / 128;
+ if (NumSections == 0 ) NumSections = 1; // Handle MMX
+ unsigned NumSectionElts = NumElems / NumSections;
+
+ for (unsigned s = 0; s < NumSections; ++s) {
+ for (unsigned i = s * NumSectionElts, j = s * NumSectionElts;
+ i != NumSectionElts * (s + 1);
+ i += 2, ++j) {
+ int BitI = Mask[i];
+ int BitI1 = Mask[i+1];
+
+ if (!isUndefOrEqual(BitI, j))
+ return false;
+ if (!isUndefOrEqual(BitI1, j))
+ return false;
+ }
}
+
return true;
}
@@ -3846,8 +3894,8 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// getShuffleScalarElt - Returns the scalar element that will make up the ith
/// element of the result of the vector shuffle.
-SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
- unsigned Depth) {
+static SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
+ unsigned Depth) {
if (Depth == 6)
return SDValue(); // Limit search depth.
@@ -3895,11 +3943,15 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
case X86ISD::PUNPCKLWD:
case X86ISD::PUNPCKLDQ:
case X86ISD::PUNPCKLQDQ:
- DecodePUNPCKLMask(NumElems, ShuffleMask);
+ DecodePUNPCKLMask(VT, ShuffleMask);
break;
case X86ISD::UNPCKLPS:
case X86ISD::UNPCKLPD:
- DecodeUNPCKLPMask(NumElems, ShuffleMask);
+ case X86ISD::VUNPCKLPS:
+ case X86ISD::VUNPCKLPD:
+ case X86ISD::VUNPCKLPSY:
+ case X86ISD::VUNPCKLPDY:
+ DecodeUNPCKLPMask(VT, ShuffleMask);
break;
case X86ISD::MOVHLPS:
DecodeMOVHLPSMask(NumElems, ShuffleMask);
@@ -3968,7 +4020,7 @@ SDValue getShuffleScalarElt(SDNode *N, int Index, SelectionDAG &DAG,
/// getNumOfConsecutiveZeros - Return the number of elements of a vector
/// shuffle operation which come from a consecutively from a zero. The
-/// search can start in two diferent directions, from left or right.
+/// search can start in two different directions, from left or right.
static
unsigned getNumOfConsecutiveZeros(SDNode *N, int NumElems,
bool ZerosFromLeft, SelectionDAG &DAG) {
@@ -5263,6 +5315,7 @@ LowerVECTOR_SHUFFLE_4wide(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
// Break it into (shuffle shuffle_hi, shuffle_lo).
Locs.clear();
+ Locs.resize(4);
SmallVector<int,8> LoMask(4U, -1);
SmallVector<int,8> HiMask(4U, -1);
@@ -5508,12 +5561,16 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasSSE2) {
X86::getShuffleSHUFImmediate(SVOp), DAG);
}
-static inline unsigned getUNPCKLOpcode(EVT VT) {
+static inline unsigned getUNPCKLOpcode(EVT VT, const X86Subtarget *Subtarget) {
switch(VT.getSimpleVT().SimpleTy) {
case MVT::v4i32: return X86ISD::PUNPCKLDQ;
case MVT::v2i64: return X86ISD::PUNPCKLQDQ;
- case MVT::v4f32: return X86ISD::UNPCKLPS;
- case MVT::v2f64: return X86ISD::UNPCKLPD;
+ case MVT::v4f32:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPS : X86ISD::UNPCKLPS;
+ case MVT::v2f64:
+ return Subtarget->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ case MVT::v8f32: return X86ISD::VUNPCKLPSY;
+ case MVT::v4f64: return X86ISD::VUNPCKLPDY;
case MVT::v16i8: return X86ISD::PUNPCKLBW;
case MVT::v8i16: return X86ISD::PUNPCKLWD;
default:
@@ -5641,7 +5698,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
// unpckh_undef). Only use pshufd if speed is more important than size.
if (OptForSize && X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()), dl, VT, V1, V1, DAG);
if (OptForSize && X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -5762,7 +5819,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
}
if (X86::isUNPCKLMask(SVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V2, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V2, DAG);
if (X86::isUNPCKHMask(SVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V2, DAG);
@@ -5789,7 +5847,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
ShuffleVectorSDNode *NewSVOp = cast<ShuffleVectorSDNode>(NewOp);
if (X86::isUNPCKLMask(NewSVOp))
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V2, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V2, V1, DAG);
if (X86::isUNPCKHMask(NewSVOp))
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V2, V1, DAG);
@@ -5812,8 +5871,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (ShuffleVectorSDNode::isSplatMask(&M[0], VT) &&
SVOp->getSplatIndex() == 0 && V2IsUndef) {
- if (VT == MVT::v2f64)
- return getTargetShuffleNode(X86ISD::UNPCKLPD, dl, VT, V1, V1, DAG);
+ if (VT == MVT::v2f64) {
+ X86ISD::NodeType Opcode =
+ getSubtarget()->hasAVX() ? X86ISD::VUNPCKLPD : X86ISD::UNPCKLPD;
+ return getTargetShuffleNode(Opcode, dl, VT, V1, V1, DAG);
+ }
if (VT == MVT::v2i64)
return getTargetShuffleNode(X86ISD::PUNPCKLQDQ, dl, VT, V1, V1, DAG);
}
@@ -5840,7 +5902,8 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const {
if (X86::isUNPCKL_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
- return getTargetShuffleNode(getUNPCKLOpcode(VT), dl, VT, V1, V1, DAG);
+ return getTargetShuffleNode(getUNPCKLOpcode(VT, getSubtarget()),
+ dl, VT, V1, V1, DAG);
if (X86::isUNPCKH_v_undef_Mask(SVOp))
if (VT != MVT::v2i64 && VT != MVT::v2f64)
return getTargetShuffleNode(getUNPCKHOpcode(VT), dl, VT, V1, V1, DAG);
@@ -7868,6 +7931,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SelectionDAG &DAG) const {
assert((Subtarget->isTargetCygMing() || Subtarget->isTargetWindows()) &&
"This should be used only on Windows targets");
+ assert(!Subtarget->isTargetEnvMacho());
DebugLoc dl = Op.getDebugLoc();
// Get the inputs.
@@ -7878,8 +7942,9 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue Flag;
EVT SPTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
+ unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
- Chain = DAG.getCopyToReg(Chain, dl, X86::EAX, Size, Flag);
+ Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
Flag = Chain.getValue(1);
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -8809,8 +8874,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
- if (C->getAPIntValue() == 1) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
@@ -8825,8 +8890,8 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
- if (C->getAPIntValue() == 1) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS))
+ if (C->isOne()) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
@@ -10351,21 +10416,48 @@ X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI,
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc DL = MI->getDebugLoc();
+ assert(!Subtarget->isTargetEnvMacho());
+
// The lowering is pretty easy: we're just emitting the call to _alloca. The
// non-trivial part is impdef of ESP.
- // FIXME: The code should be tweaked as soon as we'll try to do codegen for
- // mingw-w64.
- const char *StackProbeSymbol =
+ if (Subtarget->isTargetWin64()) {
+ if (Subtarget->isTargetCygMing()) {
+ // ___chkstk(Mingw64):
+ // Clobbers R10, R11, RAX and EFLAGS.
+ // Updates RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("___chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::RSP, RegState::Implicit)
+ .addReg(X86::RAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::RSP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ } else {
+ // __chkstk(MSVCRT): does not update stack pointer.
+ // Clobbers R10, R11 and EFLAGS.
+ // FIXME: RAX(allocated size) might be reused and not killed.
+ BuildMI(*BB, MI, DL, TII->get(X86::W64ALLOCA))
+ .addExternalSymbol("__chkstk")
+ .addReg(X86::RAX, RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ // RAX has the offset to subtracted from RSP.
+ BuildMI(*BB, MI, DL, TII->get(X86::SUB64rr), X86::RSP)
+ .addReg(X86::RSP)
+ .addReg(X86::RAX);
+ }
+ } else {
+ const char *StackProbeSymbol =
Subtarget->isTargetWindows() ? "_chkstk" : "_alloca";
- BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
- .addExternalSymbol(StackProbeSymbol)
- .addReg(X86::EAX, RegState::Implicit)
- .addReg(X86::ESP, RegState::Implicit)
- .addReg(X86::EAX, RegState::Define | RegState::Implicit)
- .addReg(X86::ESP, RegState::Define | RegState::Implicit)
- .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ BuildMI(*BB, MI, DL, TII->get(X86::CALLpcrel32))
+ .addExternalSymbol(StackProbeSymbol)
+ .addReg(X86::EAX, RegState::Implicit)
+ .addReg(X86::ESP, RegState::Implicit)
+ .addReg(X86::EAX, RegState::Define | RegState::Implicit)
+ .addReg(X86::ESP, RegState::Define | RegState::Implicit)
+ .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
+ }
MI->eraseFromParent(); // The pseudo instruction is gone now.
return BB;
@@ -12126,7 +12218,7 @@ bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const {
AsmPieces.clear();
SplitString(AsmStr, AsmPieces, " \t"); // Split with whitespace.
- // FIXME: this should verify that we are targetting a 486 or better. If not,
+ // FIXME: this should verify that we are targeting a 486 or better. If not,
// we will turn this bswap into something that will be lowered to logical ops
// instead of emitting the bswap asm. For now, we don't support 486 or lower
// so don't worry about this.
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index 6ec4a7d..6301057 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -677,9 +677,6 @@ namespace llvm {
/// getFunctionAlignment - Return the Log2 alignment of this function.
virtual unsigned getFunctionAlignment(const Function *F) const;
- unsigned getRegPressureLimit(const TargetRegisterClass *RC,
- MachineFunction &MF) const;
-
/// getStackCookieLocation - Return true if the target stores stack
/// protector cookies at a fixed offset in some non-standard address
/// space, and populates the address space and offset as
@@ -846,6 +843,12 @@ namespace llvm {
virtual bool isUsedByReturnOnly(SDNode *N) const;
+ virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
+
+ virtual EVT
+ getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
+ ISD::NodeType ExtendKind) const;
+
virtual bool
CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
index 45d1c6b..dd4f6a5 100644
--- a/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/contrib/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -12,66 +12,91 @@
//
//===----------------------------------------------------------------------===//
-// FIXME: We don't support any intrinsics for these instructions yet.
+class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pat>
+ : I<o, F, outs, ins, asm, pat>, TB, Requires<[Has3DNow]> {
+}
-class I3DNow<bits<8> o, Format F, dag outs, dag ins, string asm,
- list<dag> pattern>
- : I<o, F, outs, ins, asm, pattern>, TB, Requires<[Has3DNow]> {
+class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), pat>,
+ Has3DNow0F0FOpcode {
+ // FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
+ let isAsmParserOnly = 1;
+ let Constraints = "$src1 = $dst";
}
-class I3DNow_binop<bits<8> o, Format F, dag ins, string Mnemonic>
- : I<o, F, (outs VR64:$dst), ins,
- !strconcat(Mnemonic, "\t{$src2, $dst|$dst, $src2}"), []>,
- TB, Requires<[Has3DNow]>, Has3DNow0F0FOpcode {
+class I3DNow_conv<bits<8> o, Format F, dag ins, string Mnemonic, list<dag> pat>
+ : I3DNow<o, F, (outs VR64:$dst), ins,
+ !strconcat(Mnemonic, "\t{$src, $dst|$dst, $src}"), pat>,
+ Has3DNow0F0FOpcode {
// FIXME: The disassembler doesn't support Has3DNow0F0FOpcode yet.
let isAsmParserOnly = 1;
}
+multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn, []>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn, []>;
+}
+
+multiclass I3DNow_binop_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>;
+ def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1,
+ (bitconvert (load_mmx addr:$src2))))]>;
+}
+
+multiclass I3DNow_conv_rm<bits<8> opc, string Mn> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src1), Mn, []>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src1), Mn, []>;
+}
-let Constraints = "$src1 = $dst" in {
- // MMXI_binop_rm_int - Simple MMX binary operator based on intrinsic.
- // When this is cleaned up, remove the FIXME from X86RecognizableInstr.cpp.
- multiclass I3DNow_binop_rm<bits<8> opc, string Mn> {
- def rr : I3DNow_binop<opc, MRMSrcReg, (ins VR64:$src1, VR64:$src2), Mn>;
- def rm : I3DNow_binop<opc, MRMSrcMem, (ins VR64:$src1, i64mem:$src2), Mn>;
- }
+multiclass I3DNow_conv_rm_int<bits<8> opc, string Mn, string Ver = ""> {
+ def rr : I3DNow_conv<opc, MRMSrcReg, (ins VR64:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>;
+ def rm : I3DNow_conv<opc, MRMSrcMem, (ins i64mem:$src), Mn,
+ [(set VR64:$dst, (!cast<Intrinsic>(
+ !strconcat("int_x86_3dnow", Ver, "_", Mn))
+ (bitconvert (load_mmx addr:$src))))]>;
}
-defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb">;
-defm PF2ID : I3DNow_binop_rm<0x1D, "pf2id">;
-defm PFACC : I3DNow_binop_rm<0xAE, "pfacc">;
-defm PFADD : I3DNow_binop_rm<0x9E, "pfadd">;
-defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq">;
-defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge">;
-defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt">;
-defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax">;
-defm PFMIN : I3DNow_binop_rm<0x94, "pfmin">;
-defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul">;
-defm PFRCP : I3DNow_binop_rm<0x96, "pfrcp">;
-defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1">;
-defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2">;
-defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1">;
-defm PFRSQRT : I3DNow_binop_rm<0x97, "pfrsqrt">;
-defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub">;
-defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr">;
-defm PI2FD : I3DNow_binop_rm<0x0D, "pi2fd">;
-defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw">;
+defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb">;
+defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id">;
+defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc">;
+defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd">;
+defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq">;
+defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge">;
+defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt">;
+defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax">;
+defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin">;
+defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul">;
+defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp">;
+defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1">;
+defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2">;
+defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1">;
+defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt">;
+defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub">;
+defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr">;
+defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd">;
+defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw">;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)]>;
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i32mem:$addr),
"prefetch $addr", []>;
-
+
// FIXME: Diassembler gets a bogus decode conflict.
-let isAsmParserOnly = 1 in {
+let isAsmParserOnly = 1 in
def PREFETCHW : I3DNow<0x0D, MRM1m, (outs), (ins i16mem:$addr),
"prefetchw $addr", []>;
-}
// "3DNowA" instructions
-defm PF2IW : I3DNow_binop_rm<0x1C, "pf2iw">;
-defm PI2FW : I3DNow_binop_rm<0x0C, "pi2fw">;
-defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc">;
-defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc">;
-defm PSWAPD : I3DNow_binop_rm<0xBB, "pswapd">;
+defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", "a">;
+defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", "a">;
+defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", "a">;
+defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", "a">;
+defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", "a">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index f0ea068..9f7a4b0 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -163,7 +163,7 @@ def IMUL64rm : RI<0xAF, MRMSrcMem, (outs GR64:$dst),
} // Defs = [EFLAGS]
-// Suprisingly enough, these are not two address instructions!
+// Surprisingly enough, these are not two address instructions!
let Defs = [EFLAGS] in {
// Register-Integer Signed Integer Multiply
def IMUL16rri : Ii16<0x69, MRMSrcReg, // GR16 = GR16*I16
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index 77f4725..c228a0ae 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -263,6 +263,16 @@ let isCall = 1, isCodeGenOnly = 1 in
Requires<[IsWin64]>;
}
+let isCall = 1, isCodeGenOnly = 1 in
+ // __chkstk(MSVC): clobber R10, R11 and EFLAGS.
+ // ___chkstk(Mingw64): clobber R10, R11, RAX and EFLAGS, and update RSP.
+ let Defs = [RAX, R10, R11, RSP, EFLAGS],
+ Uses = [RSP] in {
+ def W64ALLOCA : Ii32PCRel<0xE8, RawFrm,
+ (outs), (ins i64i32imm_pcrel:$dst, variable_ops),
+ "call{q}\t$dst", []>,
+ Requires<[IsWin64]>;
+ }
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
isCodeGenOnly = 1 in
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 0660072..7daa264 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -91,21 +91,23 @@ class REX_W { bit hasREX_WPrefix = 1; }
class LOCK { bit hasLockPrefix = 1; }
class SegFS { bits<2> SegOvrBits = 1; }
class SegGS { bits<2> SegOvrBits = 2; }
-class TB { bits<4> Prefix = 1; }
-class REP { bits<4> Prefix = 2; }
-class D8 { bits<4> Prefix = 3; }
-class D9 { bits<4> Prefix = 4; }
-class DA { bits<4> Prefix = 5; }
-class DB { bits<4> Prefix = 6; }
-class DC { bits<4> Prefix = 7; }
-class DD { bits<4> Prefix = 8; }
-class DE { bits<4> Prefix = 9; }
-class DF { bits<4> Prefix = 10; }
-class XD { bits<4> Prefix = 11; }
-class XS { bits<4> Prefix = 12; }
-class T8 { bits<4> Prefix = 13; }
-class TA { bits<4> Prefix = 14; }
-class TF { bits<4> Prefix = 15; }
+class TB { bits<5> Prefix = 1; }
+class REP { bits<5> Prefix = 2; }
+class D8 { bits<5> Prefix = 3; }
+class D9 { bits<5> Prefix = 4; }
+class DA { bits<5> Prefix = 5; }
+class DB { bits<5> Prefix = 6; }
+class DC { bits<5> Prefix = 7; }
+class DD { bits<5> Prefix = 8; }
+class DE { bits<5> Prefix = 9; }
+class DF { bits<5> Prefix = 10; }
+class XD { bits<5> Prefix = 11; }
+class XS { bits<5> Prefix = 12; }
+class T8 { bits<5> Prefix = 13; }
+class TA { bits<5> Prefix = 14; }
+class A6 { bits<5> Prefix = 15; }
+class A7 { bits<5> Prefix = 16; }
+class TF { bits<5> Prefix = 17; }
class VEX { bit hasVEXPrefix = 1; }
class VEX_W { bit hasVEX_WPrefix = 1; }
class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -136,7 +138,7 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
bit hasOpSizePrefix = 0; // Does this inst have a 0x66 prefix?
bit hasAdSizePrefix = 0; // Does this inst have a 0x67 prefix?
- bits<4> Prefix = 0; // Which prefix byte does this inst have?
+ bits<5> Prefix = 0; // Which prefix byte does this inst have?
bit hasREX_WPrefix = 0; // Does this inst require the REX.W prefix?
FPFormat FPForm = NotFP; // What flavor of FP instruction is this?
bit hasLockPrefix = 0; // Does this inst have a 0xF0 prefix?
@@ -154,20 +156,20 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
let TSFlags{5-0} = FormBits;
let TSFlags{6} = hasOpSizePrefix;
let TSFlags{7} = hasAdSizePrefix;
- let TSFlags{11-8} = Prefix;
- let TSFlags{12} = hasREX_WPrefix;
- let TSFlags{15-13} = ImmT.Value;
- let TSFlags{18-16} = FPForm.Value;
- let TSFlags{19} = hasLockPrefix;
- let TSFlags{21-20} = SegOvrBits;
- let TSFlags{23-22} = ExeDomain.Value;
- let TSFlags{31-24} = Opcode;
- let TSFlags{32} = hasVEXPrefix;
- let TSFlags{33} = hasVEX_WPrefix;
- let TSFlags{34} = hasVEX_4VPrefix;
- let TSFlags{35} = hasVEX_i8ImmReg;
- let TSFlags{36} = hasVEX_L;
- let TSFlags{37} = has3DNow0F0FOpcode;
+ let TSFlags{12-8} = Prefix;
+ let TSFlags{13} = hasREX_WPrefix;
+ let TSFlags{16-14} = ImmT.Value;
+ let TSFlags{19-17} = FPForm.Value;
+ let TSFlags{20} = hasLockPrefix;
+ let TSFlags{22-21} = SegOvrBits;
+ let TSFlags{24-23} = ExeDomain.Value;
+ let TSFlags{32-25} = Opcode;
+ let TSFlags{33} = hasVEXPrefix;
+ let TSFlags{34} = hasVEX_WPrefix;
+ let TSFlags{35} = hasVEX_4VPrefix;
+ let TSFlags{36} = hasVEX_i8ImmReg;
+ let TSFlags{37} = hasVEX_L;
+ let TSFlags{38} = has3DNow0F0FOpcode;
}
class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -319,7 +321,7 @@ class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX]>;
class VPSI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedSingle>, TB,
Requires<[HasAVX]>;
// SSE2 Instruction Templates:
@@ -353,7 +355,7 @@ class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
Requires<[HasAVX]>;
class VPDI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
- : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>,
+ : I<o, F, outs, ins, !strconcat("v", asm), pattern, SSEPackedDouble>, TB,
OpSize, Requires<[HasAVX]>;
// SSE3 Instruction Templates:
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index 5016c0f..3cbfac1 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -132,6 +132,8 @@ def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
def X86Unpcklps : SDNode<"X86ISD::UNPCKLPS", SDTShuff2Op>;
def X86Unpcklpd : SDNode<"X86ISD::UNPCKLPD", SDTShuff2Op>;
+def X86Unpcklpsy : SDNode<"X86ISD::VUNPCKLPSY", SDTShuff2Op>;
+def X86Unpcklpdy : SDNode<"X86ISD::VUNPCKLPDY", SDTShuff2Op>;
def X86Unpckhps : SDNode<"X86ISD::UNPCKHPS", SDTShuff2Op>;
def X86Unpckhpd : SDNode<"X86ISD::UNPCKHPD", SDTShuff2Op>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 76a9b12..83f0260 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -232,7 +232,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable2Addr.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable2Addr[RegOp] = std::make_pair(MemOp, 0U);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2Addr[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -335,7 +335,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable0.count(RegOp) && "Duplicated entries?");
RegOp2MemOpTable0[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl0[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -460,7 +460,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable1.count(RegOp) && "Duplicate entries");
RegOp2MemOpTable1[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl1[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -682,7 +682,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
assert(!RegOp2MemOpTable2.count(RegOp) && "Duplicate entry!");
RegOp2MemOpTable2[RegOp] = std::make_pair(MemOp, Align);
- // If this is not a reversable operation (because there is a many->one)
+ // If this is not a reversible operation (because there is a many->one)
// mapping, don't insert the reverse of the operation into MemOp2RegOpTable.
if (OpTbl2[i][1] & TB_NOT_REVERSABLE)
continue;
@@ -916,7 +916,6 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVSDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
@@ -2241,6 +2240,12 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
bool isTwoAddr = NumOps > 1 &&
MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOpcode() == X86::ADD32ri &&
+ MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return NULL;
+
MachineInstr *NewMI = NULL;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
@@ -2535,6 +2540,12 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
case X86::TEST32rr:
case X86::TEST64rr:
return true;
+ case X86::ADD32ri:
+ // FIXME: AsmPrinter doesn't know how to handle
+ // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
+ if (MI->getOperand(2).getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS)
+ return false;
+ break;
}
}
@@ -2845,11 +2856,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::FsMOVAPDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::MOVDQUrm_Int:
break;
}
switch (Opc2) {
@@ -2869,11 +2878,9 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
case X86::FsMOVAPDrm:
case X86::MOVAPSrm:
case X86::MOVUPSrm:
- case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MOVDQUrm:
- case X86::MOVDQUrm_Int:
break;
}
@@ -3085,12 +3092,8 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
NopInst.setOpcode(X86::NOOP);
}
-bool X86InstrInfo::
-hasHighOperandLatency(const InstrItineraryData *ItinData,
- const MachineRegisterInfo *MRI,
- const MachineInstr *DefMI, unsigned DefIdx,
- const MachineInstr *UseMI, unsigned UseIdx) const {
- switch (DefMI->getOpcode()) {
+bool X86InstrInfo::isHighLatencyDef(int opc) const {
+ switch (opc) {
default: return false;
case X86::DIVSDrm:
case X86::DIVSDrm_Int:
@@ -3120,6 +3123,14 @@ hasHighOperandLatency(const InstrItineraryData *ItinData,
}
}
+bool X86InstrInfo::
+hasHighOperandLatency(const InstrItineraryData *ItinData,
+ const MachineRegisterInfo *MRI,
+ const MachineInstr *DefMI, unsigned DefIdx,
+ const MachineInstr *UseMI, unsigned UseIdx) const {
+ return isHighLatencyDef(DefMI->getOpcode());
+}
+
namespace {
/// CGBR - Create Global Base Reg pass. This initializes the PIC
/// global base register for x86-32.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index fcb5a25..8da68b5 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -33,15 +33,15 @@ namespace X86 {
AddrScaleAmt = 1,
AddrIndexReg = 2,
AddrDisp = 3,
-
+
/// AddrSegmentReg - The operand # of the segment in the memory operand.
AddrSegmentReg = 4,
/// AddrNumOperands - Total number of operands in a memory reference.
AddrNumOperands = 5
};
-
-
+
+
// X86 specific condition code. These correspond to X86_*_COND in
// X86InstrInfo.td. They must be kept in synch.
enum CondCode {
@@ -72,16 +72,16 @@ namespace X86 {
COND_INVALID
};
-
+
// Turn condition code into conditional branch opcode.
unsigned GetCondBranchFromCond(CondCode CC);
-
+
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(X86::CondCode CC);
}
-
+
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
@@ -90,14 +90,14 @@ namespace X86II {
enum TOF {
//===------------------------------------------------------------------===//
// X86 Specific MachineOperand flags.
-
+
MO_NO_FLAG,
-
+
/// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
/// relocation of:
/// SYMBOL_LABEL + [. - PICBASELABEL]
MO_GOT_ABSOLUTE_ADDRESS,
-
+
/// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
/// immediate should get the value of the symbol minus the PIC base label:
/// SYMBOL_LABEL - PICBASELABEL
@@ -106,77 +106,77 @@ namespace X86II {
/// MO_GOT - On a symbol operand this indicates that the immediate is the
/// offset to the GOT entry for the symbol name from the base of the GOT.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOT
MO_GOT,
-
+
/// MO_GOTOFF - On a symbol operand this indicates that the immediate is
- /// the offset to the location of the symbol name from the base of the GOT.
+ /// the offset to the location of the symbol name from the base of the GOT.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTOFF
MO_GOTOFF,
-
+
/// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
/// offset to the GOT entry for the symbol name from the current code
- /// location.
+ /// location.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @GOTPCREL
MO_GOTPCREL,
-
+
/// MO_PLT - On a symbol operand this indicates that the immediate is
- /// offset to the PLT entry of symbol name from the current code location.
+ /// offset to the PLT entry of symbol name from the current code location.
///
- /// See the X86-64 ELF ABI supplement for more details.
+ /// See the X86-64 ELF ABI supplement for more details.
/// SYMBOL_LABEL @PLT
MO_PLT,
-
+
/// MO_TLSGD - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSGD
MO_TLSGD,
-
+
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @GOTTPOFF
MO_GOTTPOFF,
-
+
/// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @INDNTPOFF
MO_INDNTPOFF,
-
+
/// MO_TPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TPOFF
MO_TPOFF,
-
+
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
- /// See 'ELF Handling for Thread-Local Storage' for more details.
+ /// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @NTPOFF
MO_NTPOFF,
-
+
/// MO_DLLIMPORT - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "__imp_FOO" symbol. This is used for
/// dllimport linkage on windows.
MO_DLLIMPORT,
-
+
/// MO_DARWIN_STUB - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$stub" symbol. This is used for calls
/// and jumps to external functions on Tiger and earlier.
MO_DARWIN_STUB,
-
+
/// MO_DARWIN_NONLAZY - On a symbol operand "FOO", this indicates that the
/// reference is actually to the "FOO$non_lazy_ptr" symbol, which is a
/// non-PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
@@ -186,19 +186,19 @@ namespace X86II {
/// that the reference is actually to "FOO$non_lazy_ptr - PICBASE", which is
/// a PIC-base-relative reference to a non-hidden dyld lazy pointer stub.
MO_DARWIN_NONLAZY_PIC_BASE,
-
+
/// MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE - On a symbol operand "FOO", this
/// indicates that the reference is actually to "FOO$non_lazy_ptr -PICBASE",
/// which is a PIC-base-relative reference to a hidden dyld lazy pointer
/// stub.
MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE,
-
+
/// MO_TLVP - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
/// This is the TLS offset for the Darwin TLS mechanism.
MO_TLVP,
-
+
/// MO_TLVP_PIC_BASE - On a symbol operand this indicates that the immediate
/// is some TLS offset from the picbase.
///
@@ -239,7 +239,7 @@ inline static bool isGlobalRelativeToPICBase(unsigned char TargetFlag) {
return false;
}
}
-
+
/// X86II - This namespace holds all of the target specific flags that
/// instruction info tracks.
///
@@ -299,7 +299,7 @@ namespace X86II {
// MRMInitReg - This form is used for instructions whose source and
// destinations are the same register.
MRMInitReg = 32,
-
+
//// MRM_C1 - A mod/rm byte of exactly 0xC1.
MRM_C1 = 33,
MRM_C2 = 34,
@@ -318,7 +318,7 @@ namespace X86II {
/// immediates, the first of which is a 16-bit immediate (specified by
/// the imm encoding) and the second is a 8-bit fixed value.
RawFrmImm8 = 43,
-
+
/// RawFrmImm16 - This is used for CALL FAR instructions, which have two
/// immediates, the first of which is a 16 or 32-bit immediate (specified by
/// the imm encoding) and the second is a 16-bit fixed value. In the AMD
@@ -347,7 +347,7 @@ namespace X86II {
// set, there is no prefix byte for obtaining a multibyte opcode.
//
Op0Shift = 8,
- Op0Mask = 0xF << Op0Shift,
+ Op0Mask = 0x1F << Op0Shift,
// TB - TwoByte - Set if this instruction has a two byte opcode, which
// starts with a 0x0F byte before the real opcode.
@@ -368,11 +368,12 @@ namespace X86II {
// floating point operations performed in the SSE registers.
XD = 11 << Op0Shift, XS = 12 << Op0Shift,
- // T8, TA - Prefix after the 0x0F prefix.
+ // T8, TA, A6, A7 - Prefix after the 0x0F prefix.
T8 = 13 << Op0Shift, TA = 14 << Op0Shift,
-
+ A6 = 15 << Op0Shift, A7 = 16 << Op0Shift,
+
// TF - Prefix before and after 0x0F
- TF = 15 << Op0Shift,
+ TF = 17 << Op0Shift,
//===------------------------------------------------------------------===//
// REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
@@ -380,13 +381,13 @@ namespace X86II {
// etc. We only cares about REX.W and REX.R bits and only the former is
// statically determined.
//
- REXShift = 12,
+ REXShift = Op0Shift + 5,
REX_W = 1 << REXShift,
//===------------------------------------------------------------------===//
// This three-bit field describes the size of an immediate operand. Zero is
// unused so that we can tell if we forgot to set a value.
- ImmShift = 13,
+ ImmShift = REXShift + 1,
ImmMask = 7 << ImmShift,
Imm8 = 1 << ImmShift,
Imm8PCRel = 2 << ImmShift,
@@ -400,7 +401,7 @@ namespace X86II {
// FP Instruction Classification... Zero is non-fp instruction.
// FPTypeMask - Mask for all of the FP types...
- FPTypeShift = 16,
+ FPTypeShift = ImmShift + 3,
FPTypeMask = 7 << FPTypeShift,
// NotFP - The default, set for instructions that do not use FP registers.
@@ -433,25 +434,26 @@ namespace X86II {
SpecialFP = 7 << FPTypeShift,
// Lock prefix
- LOCKShift = 19,
+ LOCKShift = FPTypeShift + 3,
LOCK = 1 << LOCKShift,
// Segment override prefixes. Currently we just need ability to address
// stuff in gs and fs segments.
- SegOvrShift = 20,
+ SegOvrShift = LOCKShift + 1,
SegOvrMask = 3 << SegOvrShift,
FS = 1 << SegOvrShift,
GS = 2 << SegOvrShift,
- // Execution domain for SSE instructions in bits 22, 23.
- // 0 in bits 22-23 means normal, non-SSE instruction.
- SSEDomainShift = 22,
+ // Execution domain for SSE instructions in bits 23, 24.
+ // 0 in bits 23-24 means normal, non-SSE instruction.
+ SSEDomainShift = SegOvrShift + 2,
- OpcodeShift = 24,
- OpcodeMask = 0xFF << OpcodeShift,
+ OpcodeShift = SSEDomainShift + 2,
+ OpcodeMask = 0xFFULL << OpcodeShift,
//===------------------------------------------------------------------===//
/// VEX - The opcode prefix used by AVX instructions
+ VEXShift = OpcodeShift + 8,
VEX = 1U << 0,
/// VEX_W - Has a opcode specific functionality, but is used in the same
@@ -473,7 +475,7 @@ namespace X86II {
/// if a VR256 register is used, but some AVX instructions also have this
/// field marked when using a f256 memory references.
VEX_L = 1U << 4,
-
+
/// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
/// wacky 0x0F 0x0F prefix for 3DNow! instructions. The manual documents
/// this as having a 0x0F prefix with a 0x0F opcode, and each instruction
@@ -482,18 +484,18 @@ namespace X86II {
/// this flag to indicate that the encoder should do the wacky 3DNow! thing.
Has3DNow0F0FOpcode = 1U << 5
};
-
+
// getBaseOpcodeFor - This function returns the "base" X86 opcode for the
// specified machine instruction.
//
static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
return TSFlags >> X86II::OpcodeShift;
}
-
+
static inline bool hasImm(uint64_t TSFlags) {
return (TSFlags & X86II::ImmMask) != 0;
}
-
+
/// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
/// of the specified instruction.
static inline unsigned getSizeOfImm(uint64_t TSFlags) {
@@ -508,7 +510,7 @@ namespace X86II {
case X86II::Imm64: return 8;
}
}
-
+
/// isImmPCRel - Return true if the immediate of the specified instruction's
/// TSFlags indicates that it is pc relative.
static inline unsigned isImmPCRel(uint64_t TSFlags) {
@@ -525,7 +527,7 @@ namespace X86II {
return false;
}
}
-
+
/// getMemoryOperandNo - The function returns the MCInst operand # for the
/// first field of the memory operand. If the instruction doesn't have a
/// memory operand, this returns -1.
@@ -549,11 +551,11 @@ namespace X86II {
case X86II::MRMDestMem:
return 0;
case X86II::MRMSrcMem: {
- bool HasVEX_4V = (TSFlags >> 32) & X86II::VEX_4V;
+ bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
unsigned FirstMemOp = 1;
if (HasVEX_4V)
++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
-
+
// FIXME: Maybe lea should have its own form? This is a horrible hack.
//if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r ||
// Opcode == X86::LEA16r || Opcode == X86::LEA32r)
@@ -613,7 +615,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
class X86InstrInfo : public TargetInstrInfoImpl {
X86TargetMachine &TM;
const X86RegisterInfo RI;
-
+
/// RegOp2MemOpTable2Addr, RegOp2MemOpTable0, RegOp2MemOpTable1,
/// RegOp2MemOpTable2 - Load / store folding opcode maps.
///
@@ -621,7 +623,7 @@ class X86InstrInfo : public TargetInstrInfoImpl {
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable0;
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable1;
DenseMap<unsigned, std::pair<unsigned,unsigned> > RegOp2MemOpTable2;
-
+
/// MemOp2RegOpTable - Load / store unfolding opcode map.
///
DenseMap<unsigned, std::pair<unsigned, unsigned> > MemOp2RegOpTable;
@@ -795,7 +797,7 @@ public:
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
-
+
/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler
/// to determine if two loads are loading from the same base address. It
/// should only return true if the base pointers are the same and the
@@ -805,7 +807,7 @@ public:
int64_t &Offset1, int64_t &Offset2) const;
/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
- /// determine (in conjuction with areLoadsFromSameBasePtr) if two loads should
+ /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
/// be scheduled togther. On some targets if two loads are loading from
/// addresses in the same cache line, it's better if they are scheduled
/// together. This function takes two integers that represent the load offsets
@@ -829,7 +831,7 @@ public:
return (reg == X86::SPL || reg == X86::BPL ||
reg == X86::SIL || reg == X86::DIL);
}
-
+
static bool isX86_64ExtendedReg(const MachineOperand &MO) {
if (!MO.isReg()) return false;
return isX86_64ExtendedReg(MO.getReg());
@@ -858,11 +860,13 @@ public:
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+ bool isHighLatencyDef(int opc) const;
+
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
const MachineRegisterInfo *MRI,
const MachineInstr *DefMI, unsigned DefIdx,
const MachineInstr *UseMI, unsigned UseIdx) const;
-
+
private:
MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
MachineFunction::iterator &MFI,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index f832a7c..03a0b0c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -459,7 +459,7 @@ def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
include "X86InstrFormats.td"
//===----------------------------------------------------------------------===//
-// Pattern fragments...
+// Pattern fragments.
//
// X86 specific condition code. These correspond to CondCode in
@@ -481,21 +481,21 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def immSext8 : PatLeaf<(imm), [{ return immSext8(N); }]>;
+let FastIselShouldIgnore = 1 in { // FastIsel should ignore all simm8 instrs.
+ def i16immSExt8 : ImmLeaf<i16, [{ return Imm == (int8_t)Imm; }]>;
+ def i32immSExt8 : ImmLeaf<i32, [{ return Imm == (int8_t)Imm; }]>;
+ def i64immSExt8 : ImmLeaf<i64, [{ return Imm == (int8_t)Imm; }]>;
+}
-def i16immSExt8 : PatLeaf<(i16 immSext8)>;
-def i32immSExt8 : PatLeaf<(i32 immSext8)>;
-def i64immSExt8 : PatLeaf<(i64 immSext8)>;
-def i64immSExt32 : PatLeaf<(i64 imm), [{ return i64immSExt32(N); }]>;
-def i64immZExt32 : PatLeaf<(i64 imm), [{
- // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
- // unsignedsign extended field.
- return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue();
-}]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return Imm == (int32_t)Imm; }]>;
+
+
+// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
+// unsigned field.
+def i64immZExt32 : ImmLeaf<i64, [{ return (uint64_t)Imm == (uint32_t)Imm; }]>;
-def i64immZExt32SExt8 : PatLeaf<(i64 imm), [{
- uint64_t v = N->getZExtValue();
- return v == (uint32_t)v && (int32_t)v == (int8_t)v;
+def i64immZExt32SExt8 : ImmLeaf<i64, [{
+ return (uint64_t)Imm == (uint32_t)Imm && (int32_t)Imm == (int8_t)Imm;
}]>;
// Helper fragments for loads.
@@ -1437,7 +1437,7 @@ def : InstAlias<"idivq $src, %rax", (IDIV64m i64mem:$src)>;
// Various unary fpstack operations default to operating on on ST1.
// For example, "fxch" -> "fxch %st(1)"
-def : InstAlias<"faddp", (ADD_FPrST0 ST1)>;
+def : InstAlias<"faddp", (ADD_FPrST0 ST1), 0>;
def : InstAlias<"fsubp", (SUBR_FPrST0 ST1)>;
def : InstAlias<"fsubrp", (SUB_FPrST0 ST1)>;
def : InstAlias<"fmulp", (MUL_FPrST0 ST1)>;
@@ -1455,13 +1455,15 @@ def : InstAlias<"fucompi", (UCOM_FIPr ST1)>;
// For example, "fadd %st(4), %st(0)" -> "fadd %st(4)". We also disambiguate
// instructions like "fadd %st(0), %st(0)" as "fadd %st(0)" for consistency with
// gas.
-multiclass FpUnaryAlias<string Mnemonic, Instruction Inst> {
- def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"), (Inst RST:$op)>;
- def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"), (Inst ST0)>;
+multiclass FpUnaryAlias<string Mnemonic, Instruction Inst, bit EmitAlias = 1> {
+ def : InstAlias<!strconcat(Mnemonic, " $op, %st(0)"),
+ (Inst RST:$op), EmitAlias>;
+ def : InstAlias<!strconcat(Mnemonic, " %st(0), %st(0)"),
+ (Inst ST0), EmitAlias>;
}
defm : FpUnaryAlias<"fadd", ADD_FST0r>;
-defm : FpUnaryAlias<"faddp", ADD_FPrST0>;
+defm : FpUnaryAlias<"faddp", ADD_FPrST0, 0>;
defm : FpUnaryAlias<"fsub", SUB_FST0r>;
defm : FpUnaryAlias<"fsubp", SUBR_FPrST0>;
defm : FpUnaryAlias<"fsubr", SUBR_FST0r>;
@@ -1472,8 +1474,8 @@ defm : FpUnaryAlias<"fdiv", DIV_FST0r>;
defm : FpUnaryAlias<"fdivp", DIVR_FPrST0>;
defm : FpUnaryAlias<"fdivr", DIVR_FST0r>;
defm : FpUnaryAlias<"fdivrp", DIV_FPrST0>;
-defm : FpUnaryAlias<"fcomi", COM_FIr>;
-defm : FpUnaryAlias<"fucomi", UCOM_FIr>;
+defm : FpUnaryAlias<"fcomi", COM_FIr, 0>;
+defm : FpUnaryAlias<"fucomi", UCOM_FIr, 0>;
defm : FpUnaryAlias<"fcompi", COM_FIPr>;
defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
@@ -1481,7 +1483,7 @@ defm : FpUnaryAlias<"fucompi", UCOM_FIPr>;
// Handle "f{mulp,addp} st(0), $op" the same as "f{mulp,addp} $op", since they
// commute. We also allow fdiv[r]p/fsubrp even though they don't commute,
// solely because gas supports it.
-def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op)>;
+def : InstAlias<"faddp %st(0), $op", (ADD_FPrST0 RST:$op), 0>;
def : InstAlias<"fmulp %st(0), $op", (MUL_FPrST0 RST:$op)>;
def : InstAlias<"fsubrp %st(0), $op", (SUB_FPrST0 RST:$op)>;
def : InstAlias<"fdivp %st(0), $op", (DIVR_FPrST0 RST:$op)>;
@@ -1534,29 +1536,31 @@ def : InstAlias<"mov $seg, $mem", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg)>;
def : InstAlias<"movq $imm, $reg", (MOV64ri GR64:$reg, i64imm:$imm)>;
// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64to64rr VR64:$dst, GR64:$src)>;
-def : InstAlias<"movq $src, $dst", (MMX_MOVD64from64rr GR64:$dst, VR64:$src)>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq $src, $dst",
+ (MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
// movsd with no operands (as opposed to the SSE scalar move of a double) is an
// alias for movsl. (as in rep; movsd)
def : InstAlias<"movsd", (MOVSD)>;
// movsx aliases
-def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src)>;
-def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src)>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rr8W GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx $src, $dst", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src)>;
-def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src)>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rr8W GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX16rm8W GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx $src, $dst", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
// Note: No GR32->GR64 movzx form.
// outb %dx -> outb %al, %dx
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index b912949..cde3f6b 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -135,18 +135,16 @@ class sse12_move_rm<RegisterClass RC, X86MemOperand x86memop,
// is used instead. Register-to-register movss/movsd is not modeled as an
// INSERT_SUBREG because INSERT_SUBREG requires that the insert be implementable
// in terms of a copy, and just mentioned, we don't use movss/movsd for copies.
-let isAsmParserOnly = 1 in {
- def VMOVSSrr : sse12_move_rr<FR32, v4f32,
- "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
- def VMOVSDrr : sse12_move_rr<FR64, v2f64,
- "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
+def VMOVSSrr : sse12_move_rr<FR32, v4f32,
+ "movss\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XS, VEX_4V;
+def VMOVSDrr : sse12_move_rr<FR64, v2f64,
+ "movsd\t{$src2, $src1, $dst|$dst, $src1, $src2}">, XD, VEX_4V;
- let canFoldAsLoad = 1, isReMaterializable = 1 in {
- def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def VMOVSSrm : sse12_move_rm<FR32, f32mem, loadf32, "movss">, XS, VEX;
- let AddedComplexity = 20 in
- def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
- }
+ let AddedComplexity = 20 in
+ def VMOVSDrm : sse12_move_rm<FR64, f64mem, loadf64, "movsd">, XD, VEX;
}
let Constraints = "$src1 = $dst" in {
@@ -218,14 +216,12 @@ def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
-let isAsmParserOnly = 1 in {
def VMOVSSmr : SI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>, XS, VEX;
def VMOVSDmr : SI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>, XD, VEX;
-}
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
@@ -251,7 +247,6 @@ let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
[(set RC:$dst, (ld_frag addr:$src))], d>;
}
-let isAsmParserOnly = 1 in {
defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, VEX;
defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -269,7 +264,6 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
"movups", SSEPackedSingle>, VEX;
defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
"movupd", SSEPackedDouble, 0>, OpSize, VEX;
-}
defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32,
"movaps", SSEPackedSingle>, TB;
defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
@@ -279,7 +273,6 @@ defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
"movupd", SSEPackedDouble, 0>, TB, OpSize;
-let isAsmParserOnly = 1 in {
def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(alignedstore (v4f32 VR128:$src), addr:$dst)]>, VEX;
@@ -304,7 +297,6 @@ def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src),
"movupd\t{$src, $dst|$dst, $src}",
[(store (v4f64 VR256:$src), addr:$dst)]>, VEX;
-}
def : Pat<(int_x86_avx_loadu_ps_256 addr:$src), (VMOVUPSYrm addr:$src)>;
def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src),
@@ -328,32 +320,14 @@ def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v2f64 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS/D load and store
-let isAsmParserOnly = 1 in {
- let canFoldAsLoad = 1, isReMaterializable = 1 in
- def VMOVUPSrm_Int : VPSI<0x10, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>, VEX;
- def VMOVUPDrm_Int : VPDI<0x10, MRMSrcMem, (outs VR128:$dst),
- (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>, VEX;
- def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
- def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
-}
-let canFoldAsLoad = 1, isReMaterializable = 1 in
-def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movups\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
-def MOVUPDrm_Int : PDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
- "movupd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_pd addr:$src))]>;
+def VMOVUPSmr_Int : VPSI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movups\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_storeu_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVUPDmr_Int : VPDI<0x11, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movupd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_storeu_pd addr:$dst, VR128:$src)]>, VEX;
def MOVUPSmr_Int : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
"movups\t{$src, $dst|$dst, $src}",
@@ -382,7 +356,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, RegisterClass RC,
SSEPackedDouble>, TB, OpSize;
}
-let isAsmParserOnly = 1, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
defm VMOVL : sse12_mov_hilo_packed<0x12, VR128, movlp, "movlp",
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">, VEX_4V;
defm VMOVH : sse12_mov_hilo_packed<0x16, VR128, movlhps, "movhp",
@@ -395,7 +369,6 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
"\t{$src2, $dst|$dst, $src2}">;
}
-let isAsmParserOnly = 1 in {
def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -404,7 +377,6 @@ def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlpd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
-}
def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movlps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)),
@@ -416,7 +388,6 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
// v2f64 extract element 1 is always custom lowered to unpack high to low
// and extract element 0 so the non-store version isn't too horrible.
-let isAsmParserOnly = 1 in {
def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@@ -429,7 +400,6 @@ def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>,
VEX;
-}
def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movhps\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract
@@ -441,7 +411,7 @@ def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
(v2f64 (unpckh VR128:$src, (undef))),
(iPTR 0))), addr:$dst)]>;
-let isAsmParserOnly = 1, AddedComplexity = 20 in {
+let AddedComplexity = 20 in {
def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -516,7 +486,6 @@ multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
!strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>;
}
-let isAsmParserOnly = 1 in {
defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -542,7 +511,6 @@ defm VCVTSI2SDL : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
VEX_4V;
defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
VEX_4V, VEX_W;
-}
defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
"cvttss2si\t{$src, $dst|$dst, $src}">, XS;
@@ -591,27 +559,25 @@ multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC,
[(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))]>;
}
-let isAsmParserOnly = 1 in {
- defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
- f32mem, load, "cvtss2si">, XS, VEX;
- defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
- XS, VEX, VEX_W;
- defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
- f128mem, load, "cvtsd2si">, XD, VEX;
- defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
- int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
- XD, VEX, VEX_W;
-
- // FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
- // Get rid of this hack or rename the intrinsics, there are several
- // intructions that only match with the intrinsic form, why create duplicates
- // to let them be recognized by the assembler?
- defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
- defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
- "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
-}
+defm Int_VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
+ f32mem, load, "cvtss2si">, XS, VEX;
+defm Int_VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse_cvtss2si64, f32mem, load, "cvtss2si">,
+ XS, VEX, VEX_W;
+defm Int_VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si,
+ f128mem, load, "cvtsd2si">, XD, VEX;
+defm Int_VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64,
+ int_x86_sse2_cvtsd2si64, f128mem, load, "cvtsd2si">,
+ XD, VEX, VEX_W;
+
+// FIXME: The asm matcher has a hack to ignore instructions with _Int and Int_
+// Get rid of this hack or rename the intrinsics, there are several
+// intructions that only match with the intrinsic form, why create duplicates
+// to let them be recognized by the assembler?
+defm VCVTSD2SI_alt : sse12_cvt_s_np<0x2D, FR64, GR32, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX;
+defm VCVTSD2SI64 : sse12_cvt_s_np<0x2D, FR64, GR64, f64mem,
+ "cvtsd2si\t{$src, $dst|$dst, $src}">, XD, VEX, VEX_W;
defm Int_CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si,
f32mem, load, "cvtss2si">, XS;
defm Int_CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
@@ -622,18 +588,16 @@ defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64,
f128mem, load, "cvtsd2si{q}">, XD, REX_W;
-let isAsmParserOnly = 1 in {
- defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
- defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
- VEX_W;
- defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
- int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
- defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
- int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
- VEX_4V, VEX_W;
-}
+defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss", 0>, XS, VEX_4V;
+defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss", 0>, XS, VEX_4V,
+ VEX_W;
+defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
+ int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd", 0>, XD, VEX_4V;
+defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128,
+ int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd", 0>, XD,
+ VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in {
defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128,
@@ -653,7 +617,6 @@ let Constraints = "$src1 = $dst" in {
/// SSE 1 Only
// Aliases for intrinsics
-let isAsmParserOnly = 1 in {
defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS, VEX;
defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -664,7 +627,6 @@ defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si,
defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttsd2si">, XD, VEX, VEX_W;
-}
defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si,
f32mem, load, "cvttss2si">, XS;
defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
@@ -676,7 +638,7 @@ defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64,
int_x86_sse2_cvttsd2si64, f128mem, load,
"cvttsd2si{q}">, XD, REX_W;
-let isAsmParserOnly = 1, Pattern = []<dag> in {
+let Pattern = []<dag> in {
defm VCVTSS2SI : sse12_cvt_s<0x2D, FR32, GR32, undef, f32mem, load,
"cvtss2si{l}\t{$src, $dst|$dst, $src}">, XS, VEX;
defm VCVTSS2SI64 : sse12_cvt_s<0x2D, FR32, GR64, undef, f32mem, load,
@@ -702,7 +664,6 @@ defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, undef, i128mem, load /*dummy*/,
/// SSE 2 Only
// Convert scalar double to scalar single
-let isAsmParserOnly = 1 in {
def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst),
(ins FR64:$src1, FR64:$src2),
"cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
@@ -711,7 +672,6 @@ def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst),
(ins FR64:$src1, f64mem:$src2),
"vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XD, Requires<[HasAVX, OptForSize]>, VEX_4V;
-}
def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
Requires<[HasAVX]>;
@@ -723,7 +683,6 @@ def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
[(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
Requires<[HasSSE2, OptForSize]>;
-let isAsmParserOnly = 1 in
defm Int_VCVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss", 0>,
XS, VEX_4V;
@@ -732,7 +691,7 @@ defm Int_CVTSD2SS: sse12_cvt_sint_3addr<0x5A, VR128, VR128,
int_x86_sse2_cvtsd2ss, f64mem, load, "cvtsd2ss">, XS;
// Convert scalar single to scalar double
-let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst),
(ins FR32:$src1, FR32:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -741,7 +700,6 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
(ins FR32:$src1, f32mem:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[]>, XS, VEX_4V, Requires<[HasAVX, OptForSize]>;
-}
def : Pat<(f64 (fextend FR32:$src)), (VCVTSS2SDrr FR32:$src, FR32:$src)>,
Requires<[HasAVX]>;
@@ -754,7 +712,6 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
Requires<[HasSSE2, OptForSize]>;
-let isAsmParserOnly = 1 in {
def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -767,7 +724,6 @@ def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem,
[(set VR128:$dst, (int_x86_sse2_cvtss2sd VR128:$src1,
(load addr:$src2)))]>, XS, VEX_4V,
Requires<[HasAVX]>;
-}
let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix
def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
@@ -788,7 +744,7 @@ def : Pat<(extloadf32 addr:$src),
Requires<[HasSSE2, OptForSpeed]>;
// Convert doubleword to packed single/double fp
-let isAsmParserOnly = 1 in { // SSE2 instructions without OpSize prefix
+// SSE2 instructions without OpSize prefix
def Int_VCVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -798,7 +754,6 @@ def Int_VCVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps
(bitconvert (memopv2i64 addr:$src))))]>,
TB, VEX, Requires<[HasAVX]>;
-}
def Int_CVTDQ2PSrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2ps VR128:$src))]>,
@@ -810,7 +765,7 @@ def Int_CVTDQ2PSrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
TB, Requires<[HasSSE2]>;
// FIXME: why the non-intrinsic version is described as SSE3?
-let isAsmParserOnly = 1 in { // SSE2 instructions with XS prefix
+// SSE2 instructions with XS prefix
def Int_VCVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -820,7 +775,6 @@ def Int_VCVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd
(bitconvert (memopv2i64 addr:$src))))]>,
XS, VEX, Requires<[HasAVX]>;
-}
def Int_CVTDQ2PDrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtdq2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))]>,
@@ -833,7 +787,6 @@ def Int_CVTDQ2PDrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
// Convert packed single/double fp to doubleword
-let isAsmParserOnly = 1 in {
def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -842,13 +795,11 @@ def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}", []>;
-let isAsmParserOnly = 1 in {
def Int_VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>,
@@ -858,7 +809,6 @@ def Int_VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>, VEX;
-}
def Int_CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))]>;
@@ -867,7 +817,7 @@ def Int_CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtps2dq
(memop addr:$src)))]>;
-let isAsmParserOnly = 1 in { // SSE2 packed instructions with XD prefix
+// SSE2 packed instructions with XD prefix
def Int_VCVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -877,7 +827,6 @@ def Int_VCVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq
(memop addr:$src)))]>,
XD, VEX, Requires<[HasAVX]>;
-}
def Int_CVTPD2DQrr : I<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
@@ -890,7 +839,7 @@ def Int_CVTPD2DQrm : I<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
// Convert with truncation packed single/double fp to doubleword
-let isAsmParserOnly = 1 in { // SSE2 packed instructions with XS prefix
+// SSE2 packed instructions with XS prefix
def VCVTTPS2DQrr : VSSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQrm : VSSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
@@ -899,7 +848,6 @@ def VCVTTPS2DQYrr : VSSI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPS2DQYrm : VSSI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def CVTTPS2DQrr : SSI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -910,7 +858,6 @@ def CVTTPS2DQrm : SSI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
(int_x86_sse2_cvttps2dq (memop addr:$src)))]>;
-let isAsmParserOnly = 1 in {
def Int_VCVTTPS2DQrr : I<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvttps2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -921,9 +868,7 @@ def Int_VCVTTPS2DQrm : I<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttps2dq
(memop addr:$src)))]>,
XS, VEX, Requires<[HasAVX]>;
-}
-let isAsmParserOnly = 1 in {
def Int_VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
@@ -934,7 +879,6 @@ def Int_VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>, VEX;
-}
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvttpd2dq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))]>;
@@ -943,7 +887,6 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvttpd2dq
(memop addr:$src)))]>;
-let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -963,10 +906,9 @@ def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvttpd2dqy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
// Convert packed single to packed double
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// SSE2 instructions without OpSize prefix
def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}", []>, VEX;
@@ -982,7 +924,6 @@ def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}", []>, TB;
-let isAsmParserOnly = 1 in {
def Int_VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vcvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -992,7 +933,6 @@ def Int_VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
[(set VR128:$dst, (int_x86_sse2_cvtps2pd
(load addr:$src)))]>,
VEX, Requires<[HasAVX]>;
-}
def Int_CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtps2pd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))]>,
@@ -1004,7 +944,6 @@ def Int_CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
TB, Requires<[HasSSE2]>;
// Convert packed double to packed single
-let isAsmParserOnly = 1 in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -1024,14 +963,12 @@ def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
"cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
"cvtpd2psy\t{$src, $dst|$dst, $src}", []>, VEX, VEX_L;
-}
def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}", []>;
-let isAsmParserOnly = 1 in {
def Int_VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1040,7 +977,6 @@ def Int_VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps
(memop addr:$src)))]>;
-}
def Int_CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2ps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))]>;
@@ -1089,26 +1025,27 @@ def : Pat<(int_x86_avx_cvtt_ps2dq_256 (memopv8f32 addr:$src)),
// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions
multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
string asm, string asm_alt> {
- def rr : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
- asm, []>;
- let mayLoad = 1 in
- def rm : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
- asm, []>;
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def rr_alt : SIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
- asm_alt, []>;
+ def rr : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc),
+ asm, []>;
let mayLoad = 1 in
- def rm_alt : SIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
- asm_alt, []>;
+ def rm : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, SSECC:$cc),
+ asm, []>;
}
+
+ // Accept explicit immediate argument form instead of comparison code.
+ def rr_alt : SIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, []>;
+ let mayLoad = 1 in
+ def rm_alt : SIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, x86memop:$src, i8imm:$src2),
+ asm_alt, []>;
}
-let neverHasSideEffects = 1, isAsmParserOnly = 1 in {
+let neverHasSideEffects = 1 in {
defm VCMPSS : sse12_cmp_scalar<FR32, f32mem,
"cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
"cmpss\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}">,
@@ -1141,14 +1078,12 @@ multiclass sse12_cmp_scalar_int<RegisterClass RC, X86MemOperand x86memop,
}
// Aliases to match intrinsics which expect XMM operand(s).
-let isAsmParserOnly = 1 in {
- defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
- "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
- XS, VEX_4V;
- defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
- "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
- XD, VEX_4V;
-}
+defm Int_VCMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
+ "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XS, VEX_4V;
+defm Int_VCMPSD : sse12_cmp_scalar_int<VR128, f64mem, int_x86_sse2_cmp_sd,
+ "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}">,
+ XD, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm Int_CMPSS : sse12_cmp_scalar_int<VR128, f32mem, int_x86_sse_cmp_ss,
"cmp${cc}ss\t{$src, $dst|$dst, $src}">, XS;
@@ -1171,28 +1106,26 @@ multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode,
}
let Defs = [EFLAGS] in {
- let isAsmParserOnly = 1 in {
- defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
- "ucomiss", SSEPackedSingle>, VEX;
- defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
- "ucomisd", SSEPackedDouble>, OpSize, VEX;
- let Pattern = []<dag> in {
- defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
- "comiss", SSEPackedSingle>, VEX;
- defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
- "comisd", SSEPackedDouble>, OpSize, VEX;
- }
-
- defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
- load, "ucomiss", SSEPackedSingle>, VEX;
- defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
- load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
-
- defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
- load, "comiss", SSEPackedSingle>, VEX;
- defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
- load, "comisd", SSEPackedDouble>, OpSize, VEX;
+ defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
+ "ucomiss", SSEPackedSingle>, VEX;
+ defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
+ "ucomisd", SSEPackedDouble>, OpSize, VEX;
+ let Pattern = []<dag> in {
+ defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load,
+ "comiss", SSEPackedSingle>, VEX;
+ defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load,
+ "comisd", SSEPackedDouble>, OpSize, VEX;
}
+
+ defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem,
+ load, "ucomiss", SSEPackedSingle>, VEX;
+ defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem,
+ load, "ucomisd", SSEPackedDouble>, OpSize, VEX;
+
+ defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem,
+ load, "comiss", SSEPackedSingle>, VEX;
+ defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem,
+ load, "comisd", SSEPackedDouble>, OpSize, VEX;
defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32,
"ucomiss", SSEPackedSingle>, TB;
defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64,
@@ -1220,41 +1153,40 @@ let Defs = [EFLAGS] in {
multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop,
Intrinsic Int, string asm, string asm_alt,
Domain d> {
- def rri : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
- def rmi : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
- [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
- // Accept explicit immediate argument form instead of comparison code.
let isAsmParserOnly = 1 in {
- def rri_alt : PIi8<0xC2, MRMSrcReg,
- (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
- asm_alt, [], d>;
- def rmi_alt : PIi8<0xC2, MRMSrcMem,
- (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
- asm_alt, [], d>;
+ def rri : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, RC:$src, imm:$cc))], d>;
+ def rmi : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, SSECC:$cc), asm,
+ [(set RC:$dst, (Int RC:$src1, (memop addr:$src), imm:$cc))], d>;
}
-}
-let isAsmParserOnly = 1 in {
- defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
- "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
- "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
- "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
-}
+ // Accept explicit immediate argument form instead of comparison code.
+ def rri_alt : PIi8<0xC2, MRMSrcReg,
+ (outs RC:$dst), (ins RC:$src1, RC:$src, i8imm:$src2),
+ asm_alt, [], d>;
+ def rmi_alt : PIi8<0xC2, MRMSrcMem,
+ (outs RC:$dst), (ins RC:$src1, f128mem:$src, i8imm:$src2),
+ asm_alt, [], d>;
+}
+
+defm VCMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPD : sse12_cmp_packed<VR128, f128mem, int_x86_sse2_cmp_pd,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_ps_256,
+ "cmp${cc}ps\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmpps\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedSingle>, VEX_4V;
+defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, int_x86_avx_cmp_pd_256,
+ "cmp${cc}pd\t{$src, $src1, $dst|$dst, $src1, $src}",
+ "cmppd\t{$src2, $src, $src1, $dst|$dst, $src1, $src, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm CMPPS : sse12_cmp_packed<VR128, f128mem, int_x86_sse_cmp_ps,
"cmp${cc}ps\t{$src, $dst|$dst, $src}",
@@ -1294,20 +1226,18 @@ multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
(vt (shufp:$src3 RC:$src1, RC:$src2)))], d>;
}
-let isAsmParserOnly = 1 in {
- defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv4f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
- "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
- memopv8f32, SSEPackedSingle>, VEX_4V;
- defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv2f64, SSEPackedDouble>, OpSize, VEX_4V;
- defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
- "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
- memopv4f64, SSEPackedDouble>, OpSize, VEX_4V;
-}
+defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv4f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32,
+ "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
+ memopv8f32, SSEPackedSingle>, TB, VEX_4V;
+defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv2f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
+defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64,
+ "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src2, $src2, $src3}",
+ memopv4f64, SSEPackedDouble>, TB, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32,
@@ -1340,33 +1270,31 @@ multiclass sse12_unpack_interleave<bits<8> opc, PatFrag OpNode, ValueType vt,
}
let AddedComplexity = 10 in {
- let isAsmParserOnly = 1 in {
- defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
- VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
- VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
- VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
- VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
-
- defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
- VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
- VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
- VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedSingle>, VEX_4V;
- defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
- VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- SSEPackedDouble>, OpSize, VEX_4V;
- }
+ defm VUNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
+ VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPD: sse12_unpack_interleave<0x15, unpckh, v2f64, memopv2f64,
+ VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPS: sse12_unpack_interleave<0x14, unpckl, v4f32, memopv4f32,
+ VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPD: sse12_unpack_interleave<0x14, unpckl, v2f64, memopv2f64,
+ VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+
+ defm VUNPCKHPSY: sse12_unpack_interleave<0x15, unpckh, v8f32, memopv8f32,
+ VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKHPDY: sse12_unpack_interleave<0x15, unpckh, v4f64, memopv4f64,
+ VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
+ defm VUNPCKLPSY: sse12_unpack_interleave<0x14, unpckl, v8f32, memopv8f32,
+ VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedSingle>, VEX_4V;
+ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, unpckl, v4f64, memopv4f64,
+ VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+ SSEPackedDouble>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm UNPCKHPS: sse12_unpack_interleave<0x15, unpckh, v4f32, memopv4f32,
@@ -1404,30 +1332,28 @@ defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps",
defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd",
SSEPackedDouble>, TB, OpSize;
-let isAsmParserOnly = 1 in {
- defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
- "movmskps", SSEPackedSingle>, VEX;
- defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
- defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
- "movmskps", SSEPackedSingle>, VEX;
- defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
- "movmskpd", SSEPackedDouble>, OpSize,
- VEX;
+defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
+defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256,
+ "movmskps", SSEPackedSingle>, VEX;
+defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256,
+ "movmskpd", SSEPackedDouble>, OpSize,
+ VEX;
- // Assembler Only
- def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
- def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
- def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
- "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
- VEX;
-}
+// Assembler Only
+def VMOVMSKPSr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
+def VMOVMSKPSYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskps\t{$src, $dst|$dst, $src}", [], SSEPackedSingle>, VEX;
+def VMOVMSKPDYr64r : PI<0x50, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src),
+ "movmskpd\t{$src, $dst|$dst, $src}", [], SSEPackedDouble>, OpSize,
+ VEX;
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Misc aliasing of packed SSE 1 & 2 instructions
@@ -1482,13 +1408,11 @@ def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
///
multiclass sse12_fp_alias_pack_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode> {
- let isAsmParserOnly = 1 in {
- defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
- FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
+ defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
+ FR32, f32, f128mem, memopfsf32, SSEPackedSingle, 0>, VEX_4V;
- defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
- FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
- }
+ defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
+ FR64, f64, f128mem, memopfsf64, SSEPackedDouble, 0>, OpSize, VEX_4V;
let Constraints = "$src1 = $dst" in {
defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
@@ -1514,7 +1438,7 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
SDNode OpNode, int HasPat = 0,
list<list<dag>> Pattern = []> {
- let isAsmParserOnly = 1, Pattern = []<dag> in {
+ let Pattern = []<dag> in {
defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f128mem,
!if(HasPat, Pattern[0], // rr
@@ -1561,7 +1485,6 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
/// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
///
-let isAsmParserOnly = 1 in {
multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
!strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
@@ -1569,7 +1492,6 @@ multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
!strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
}
-}
// AVX 256-bit packed logical ops forms
defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
@@ -1667,38 +1589,36 @@ multiclass basic_sse12_fp_binop_p_y_int<bits<8> opc, string OpcodeStr> {
}
// Binary Arithmetic instructions
-let isAsmParserOnly = 1 in {
- defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_s_int<0x58, "add", 0>,
- basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
- basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
- defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
- basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
- basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
+defm VADD : basic_sse12_fp_binop_s<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_s_int<0x58, "add", 0>,
+ basic_sse12_fp_binop_p<0x58, "add", fadd, 0>,
+ basic_sse12_fp_binop_p_y<0x58, "add", fadd>, VEX_4V;
+defm VMUL : basic_sse12_fp_binop_s<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_s_int<0x59, "mul", 0>,
+ basic_sse12_fp_binop_p<0x59, "mul", fmul, 0>,
+ basic_sse12_fp_binop_p_y<0x59, "mul", fmul>, VEX_4V;
- let isCommutable = 0 in {
- defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
- basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
- basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
- defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
- basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
- basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
- defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
- basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
- basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
- basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
- defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
- basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
- basic_sse12_fp_binop_p_y_int<0x5D, "min">,
- basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
- }
+let isCommutable = 0 in {
+ defm VSUB : basic_sse12_fp_binop_s<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_s_int<0x5C, "sub", 0>,
+ basic_sse12_fp_binop_p<0x5C, "sub", fsub, 0>,
+ basic_sse12_fp_binop_p_y<0x5C, "sub", fsub>, VEX_4V;
+ defm VDIV : basic_sse12_fp_binop_s<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_s_int<0x5E, "div", 0>,
+ basic_sse12_fp_binop_p<0x5E, "div", fdiv, 0>,
+ basic_sse12_fp_binop_p_y<0x5E, "div", fdiv>, VEX_4V;
+ defm VMAX : basic_sse12_fp_binop_s<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_s_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p<0x5F, "max", X86fmax, 0>,
+ basic_sse12_fp_binop_p_int<0x5F, "max", 0>,
+ basic_sse12_fp_binop_p_y<0x5F, "max", X86fmax>,
+ basic_sse12_fp_binop_p_y_int<0x5F, "max">, VEX_4V;
+ defm VMIN : basic_sse12_fp_binop_s<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_s_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p<0x5D, "min", X86fmin, 0>,
+ basic_sse12_fp_binop_p_int<0x5D, "min", 0>,
+ basic_sse12_fp_binop_p_y_int<0x5D, "min">,
+ basic_sse12_fp_binop_p_y<0x5D, "min", X86fmin>, VEX_4V;
}
let Constraints = "$src1 = $dst" in {
@@ -1899,7 +1819,7 @@ multiclass sse2_fp_unop_p_y_int<bits<8> opc, string OpcodeStr,
[(set VR256:$dst, (V2F64Int (memopv4f64 addr:$src)))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// Square root.
defm VSQRT : sse1_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse_sqrt_ss>,
sse2_fp_unop_s_avx<0x51, "vsqrt", fsqrt, int_x86_sse2_sqrt_sd>,
@@ -1955,67 +1875,65 @@ defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, int_x86_sse_rcp_ss>,
// SSE 1 & 2 - Non-temporal stores
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in {
- def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
- def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
+def VMOVNTPSmr_Int : VPSI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>, VEX;
+def VMOVNTPDmr_Int : VPDI<0x2B, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr_Int : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
+
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+ def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>, VEX;
-
- let AddedComplexity = 400 in { // Prefer non-temporal versions
- def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v2f64 VR128:$src),
- addr:$dst)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQ_64mr : VPDI<0xE7, MRMDestMem, (outs),
(ins f128mem:$dst, VR128:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f32 VR128:$src),
+ [(alignednontemporalstore (v2f64 VR128:$src),
addr:$dst)]>, VEX;
-
- def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntps\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
- (ins f256mem:$dst, VR256:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v4f64 VR256:$src),
- addr:$dst)]>, VEX;
- let ExeDomain = SSEPackedInt in
- def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src),
+ addr:$dst)]>, VEX;
+
+ def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f64 VR256:$src),
+ addr:$dst)]>, VEX;
+ def VMOVNTDQY_64mr : VPDI<0xE7, MRMDestMem, (outs),
(ins f256mem:$dst, VR256:$src),
"movntdq\t{$src, $dst|$dst, $src}",
- [(alignednontemporalstore (v8f32 VR256:$src),
+ [(alignednontemporalstore (v4f64 VR256:$src),
addr:$dst)]>, VEX;
- }
+ let ExeDomain = SSEPackedInt in
+ def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs),
+ (ins f256mem:$dst, VR256:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v8f32 VR256:$src),
+ addr:$dst)]>, VEX;
}
def : Pat<(int_x86_avx_movnt_dq_256 addr:$dst, VR256:$src),
@@ -2138,12 +2056,10 @@ def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
// SSE 1 & 2 - Load/Store XCSR register
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in {
- def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
- "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
- def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
- "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
-}
+def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
+ "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>, VEX;
+def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
+ "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)]>, VEX;
def LDMXCSR : PSI<0xAE, MRM2m, (outs), (ins i32mem:$src),
"ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)]>;
@@ -2156,45 +2072,43 @@ def STMXCSR : PSI<0xAE, MRM3m, (outs), (ins i32mem:$dst),
let ExeDomain = SSEPackedInt in { // SSE integer instructions
-let isAsmParserOnly = 1 in {
- let neverHasSideEffects = 1 in {
- def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- }
- def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
- def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
- "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
-
- let canFoldAsLoad = 1, mayLoad = 1 in {
- def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- let Predicates = [HasAVX] in {
- def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- }
- }
+let neverHasSideEffects = 1 in {
+def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+}
+def VMOVDQUrr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
+def VMOVDQUYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
+ "movdqu\t{$src, $dst|$dst, $src}", []>, XS, VEX;
- let mayStore = 1 in {
- def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i128mem:$dst, VR128:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
- (ins i256mem:$dst, VR256:$src),
- "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
- let Predicates = [HasAVX] in {
- def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+let canFoldAsLoad = 1, mayLoad = 1 in {
+def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+ def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
- }
- }
+}
+}
+
+let mayStore = 1 in {
+def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i128mem:$dst, VR128:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs),
+ (ins i256mem:$dst, VR256:$src),
+ "movdqa\t{$src, $dst|$dst, $src}", []>, VEX;
+let Predicates = [HasAVX] in {
+def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
+ "vmovdqu\t{$src, $dst|$dst, $src}",[]>, XS, VEX;
+}
}
let neverHasSideEffects = 1 in
@@ -2226,23 +2140,11 @@ def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
}
// Intrinsic forms of MOVDQU load and store
-let isAsmParserOnly = 1 in {
-let canFoldAsLoad = 1 in
-def VMOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "vmovdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
- XS, VEX, Requires<[HasAVX]>;
def VMOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"vmovdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
XS, VEX, Requires<[HasAVX]>;
-}
-let canFoldAsLoad = 1 in
-def MOVDQUrm_Int : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
- "movdqu\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (int_x86_sse2_loadu_dq addr:$src))]>,
- XS, Requires<[HasSSE2]>;
def MOVDQUmr_Int : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movdqu\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storeu_dq addr:$dst, VR128:$src)]>,
@@ -2347,7 +2249,7 @@ multiclass PDI_binop_rm_v2i64<bits<8> opc, string OpcodeStr, SDNode OpNode,
// 128-bit Integer Arithmetic
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V;
defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V;
defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V;
@@ -2437,7 +2339,7 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>;
// SSE2 - Packed Integer Logical Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
VEX_4V;
@@ -2584,7 +2486,7 @@ let Predicates = [HasSSE2] in {
// SSE2 - Packed Integer Comparison Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1,
0>, VEX_4V;
defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1,
@@ -2638,7 +2540,7 @@ def : Pat<(v4i32 (X86pcmpgtd VR128:$src1, (memop addr:$src2))),
// SSE2 - Packed Integer Pack Instructions
//===---------------------------------------------------------------------===//
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128,
0, 0>, VEX_4V;
defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128,
@@ -2676,7 +2578,7 @@ def mi : Ii8<0x70, MRMSrcMem,
}
} // ExeDomain = SSEPackedInt
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let AddedComplexity = 5 in
defm VPSHUFD : sse2_pshuffle<"vpshufd", v4i32, pshufd, bc_v4i32>, OpSize,
VEX;
@@ -2724,7 +2626,7 @@ multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt,
addr:$src2))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, unpckl, bc_v16i8,
0>, VEX_4V;
defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, unpckl, bc_v8i16,
@@ -2834,7 +2736,7 @@ multiclass sse2_pinsrw<bit Is2Addr = 1> {
}
// Extract
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VPEXTRWri : Ii8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR128:$src1, i32i8imm:$src2),
"vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -2847,7 +2749,7 @@ def PEXTRWri : PDIi8<0xC5, MRMSrcReg,
imm:$src2))]>;
// Insert
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPINSRW : sse2_pinsrw<0>, OpSize, VEX_4V;
def VPINSRWrr64i : Ii8<0xC4, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, GR64:$src2, i32i8imm:$src3),
@@ -2866,13 +2768,11 @@ let Constraints = "$src1 = $dst" in
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in {
def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX;
def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX;
-}
def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
"pmovmskb\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>;
@@ -2885,7 +2785,6 @@ def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
let ExeDomain = SSEPackedInt in {
-let isAsmParserOnly = 1 in {
let Uses = [EDI] in
def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
@@ -2896,7 +2795,6 @@ def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs),
(ins VR128:$src, VR128:$mask),
"maskmovdqu\t{$mask, $src|$src, $mask}",
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>, VEX;
-}
let Uses = [EDI] in
def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
@@ -2914,7 +2812,6 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
//===---------------------------------------------------------------------===//
// Move Int Doubleword to Packed Double Int
-let isAsmParserOnly = 1 in {
def VMOVDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2924,7 +2821,6 @@ def VMOVDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
[(set VR128:$dst,
(v4i32 (scalar_to_vector (loadi32 addr:$src))))]>,
VEX;
-}
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2943,7 +2839,6 @@ def MOV64toSDrr : RPDI<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
// Move Int Doubleword to Single Scalar
-let isAsmParserOnly = 1 in {
def VMOVDI2SSrr : VPDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>, VEX;
@@ -2952,7 +2847,6 @@ def VMOVDI2SSrm : VPDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>,
VEX;
-}
def MOVDI2SSrr : PDI<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (bitconvert GR32:$src))]>;
@@ -2962,7 +2856,6 @@ def MOVDI2SSrm : PDI<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src),
[(set FR32:$dst, (bitconvert (loadi32 addr:$src)))]>;
// Move Packed Doubleword Int to Packed Double Int
-let isAsmParserOnly = 1 in {
def VMOVPDI2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2972,7 +2865,6 @@ def VMOVPDI2DImr : VPDI<0x7E, MRMDestMem, (outs),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (vector_extract (v4i32 VR128:$src),
(iPTR 0))), addr:$dst)]>, VEX;
-}
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2998,14 +2890,12 @@ def MOVSDto64mr : RPDI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src),
[(store (i64 (bitconvert FR64:$src)), addr:$dst)]>;
// Move Scalar Single to Double Int
-let isAsmParserOnly = 1 in {
def VMOVSS2DIrr : VPDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>, VEX;
def VMOVSS2DImr : VPDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, VEX;
-}
def MOVSS2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (bitconvert FR32:$src))]>;
@@ -3014,7 +2904,7 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
// movd / movq to XMM register zero-extends
-let AddedComplexity = 15, isAsmParserOnly = 1 in {
+let AddedComplexity = 15 in {
def VMOVZDI2PDIrr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4i32 (X86vzmovl
@@ -3038,7 +2928,6 @@ def MOVZQI2PQIrr : RPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
}
let AddedComplexity = 20 in {
-let isAsmParserOnly = 1 in
def VMOVZDI2PDIrm : VPDI<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3064,7 +2953,6 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
//===---------------------------------------------------------------------===//
// Move Quadword Int to Packed Quadword Int
-let isAsmParserOnly = 1 in
def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3077,7 +2965,6 @@ def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
Requires<[HasSSE2]>; // SSE2 instruction with XS Prefix
// Move Packed Quadword Int to Quadword Int
-let isAsmParserOnly = 1 in
def VMOVPQI2QImr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(store (i64 (vector_extract (v2i64 VR128:$src),
@@ -3091,7 +2978,6 @@ def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
(f64 (EXTRACT_SUBREG (v2f64 VR128:$src), sub_sd))>;
// Store / copy lower 64-bits of a XMM register.
-let isAsmParserOnly = 1 in
def VMOVLQ128mr : VPDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>, VEX;
@@ -3099,7 +2985,7 @@ def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-let AddedComplexity = 20, isAsmParserOnly = 1 in
+let AddedComplexity = 20 in
def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -3124,7 +3010,7 @@ def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in
// IA32 document. movq xmm1, xmm2 does clear the high bits.
-let isAsmParserOnly = 1, AddedComplexity = 15 in
+let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
@@ -3135,7 +3021,7 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[HasSSE2]>;
-let AddedComplexity = 20, isAsmParserOnly = 1 in
+let AddedComplexity = 20 in
def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl
@@ -3153,7 +3039,6 @@ def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4i32 addr:$src)))),
}
// Instructions to match in the assembler
-let isAsmParserOnly = 1 in {
def VMOVQs64rr : VPDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
@@ -3161,13 +3046,12 @@ def VMOVQd64rr : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
// Recognize "movd" with GR64 destination, but encode as a "movq"
def VMOVQd64rr_alt : VPDI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}", []>, VEX, VEX_W;
-}
// Instructions for the disassembler
// xr = XMM register
// xm = mem64
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VMOVQxrxr: I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}", []>, VEX, XS;
def MOVQxrxr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3209,7 +3093,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
//===---------------------------------------------------------------------===//
// Convert Packed Double FP to Packed DW Integers
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// The assembler can recognize rr 256-bit instructions by seeing a ymm
// register, but the same isn't true when using memory operands instead.
// Provide other assembly rr and rm forms to address this explicitly.
@@ -3237,7 +3121,7 @@ def CVTPD2DQrr : S3DI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"cvtpd2dq\t{$src, $dst|$dst, $src}", []>;
// Convert Packed DW Integers to Packed Double FP
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
def VCVTDQ2PDrm : S3SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"vcvtdq2pd\t{$src, $dst|$dst, $src}", []>, VEX;
def VCVTDQ2PDrr : S3SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -3288,7 +3172,7 @@ def rm : S3SI<op, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), []>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// FIXME: Merge above classes when we have patterns for the ymm version
defm VMOVSHDUP : sse3_replicate_sfp<0x16, movshdup, "vmovshdup">, VEX;
defm VMOVSLDUP : sse3_replicate_sfp<0x12, movsldup, "vmovsldup">, VEX;
@@ -3319,7 +3203,7 @@ def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
[]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// FIXME: Merge above classes when we have patterns for the ymm version
defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX;
defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX;
@@ -3327,7 +3211,7 @@ let isAsmParserOnly = 1, Predicates = [HasAVX] in {
defm MOVDDUP : sse3_replicate_dfp<"movddup">;
// Move Unaligned Integer
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vlddqu\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX;
@@ -3391,21 +3275,21 @@ multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC,
[(set RC:$dst, (Int RC:$src1, (memop addr:$src2)))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX],
+let Predicates = [HasAVX],
ExeDomain = SSEPackedDouble in {
defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128,
- f128mem, 0>, XD, VEX_4V;
+ f128mem, 0>, TB, XD, VEX_4V;
defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128,
- f128mem, 0>, OpSize, VEX_4V;
+ f128mem, 0>, TB, OpSize, VEX_4V;
defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256,
- f256mem, 0>, XD, VEX_4V;
+ f256mem, 0>, TB, XD, VEX_4V;
defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256,
- f256mem, 0>, OpSize, VEX_4V;
+ f256mem, 0>, TB, OpSize, VEX_4V;
}
let Constraints = "$src1 = $dst", Predicates = [HasSSE3],
ExeDomain = SSEPackedDouble in {
defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128,
- f128mem>, XD;
+ f128mem>, TB, XD;
defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128,
f128mem>, TB, OpSize;
}
@@ -3444,7 +3328,7 @@ multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC,
[(set RC:$dst, (vt (IntId RC:$src1, (memop addr:$src2))))]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem,
int_x86_sse3_hadd_ps, 0>, VEX_4V;
defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem,
@@ -3496,7 +3380,7 @@ multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (mem_frag128 addr:$src))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8,
int_x86_ssse3_pabs_b_128>, VEX;
defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", memopv8i16,
@@ -3538,7 +3422,7 @@ multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VPHADDW : SS3I_binop_rm_int<0x01, "vphaddw", memopv8i16,
int_x86_ssse3_phadd_w_128, 0>, VEX_4V;
@@ -3630,7 +3514,7 @@ multiclass ssse3_palign<string asm, bit Is2Addr = 1> {
[]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PALIGN : ssse3_palign<"palignr">;
@@ -3985,7 +3869,7 @@ multiclass SS41I_binop_rm_int8<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>,
VEX;
defm VPMOVSXWD : SS41I_binop_rm_int8<0x23, "vpmovsxwd", int_x86_sse41_pmovsxwd>,
@@ -4051,7 +3935,7 @@ multiclass SS41I_binop_rm_int4<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>,
VEX;
defm VPMOVSXWQ : SS41I_binop_rm_int4<0x24, "vpmovsxwq", int_x86_sse41_pmovsxwq>,
@@ -4092,7 +3976,7 @@ multiclass SS41I_binop_rm_int2<bits<8> opc, string OpcodeStr, Intrinsic IntId> {
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>,
VEX;
defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>,
@@ -4134,7 +4018,7 @@ multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> {
// (store (i8 (trunc (X86pextrb (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX;
def VPEXTRBrr64 : SS4AIi8<0x14, MRMDestReg, (outs GR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
@@ -4156,7 +4040,7 @@ multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> {
// (store (i16 (trunc (X86pextrw (v16i8 VR128:$src1), imm:$src2))), addr:$dst)
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX;
defm PEXTRW : SS41I_extract16<0x15, "pextrw">;
@@ -4178,7 +4062,7 @@ multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX;
defm PEXTRD : SS41I_extract32<0x16, "pextrd">;
@@ -4199,7 +4083,7 @@ multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize, REX_W;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W;
defm PEXTRQ : SS41I_extract64<0x16, "pextrq">;
@@ -4222,7 +4106,7 @@ multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr> {
addr:$dst)]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX;
def VEXTRACTPSrr64 : SS4AIi8<0x17, MRMDestReg, (outs GR64:$dst),
(ins VR128:$src1, i32i8imm:$src2),
@@ -4262,7 +4146,7 @@ multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRB : SS41I_insert8<0x20, "pinsrb">;
@@ -4288,7 +4172,7 @@ multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PINSRD : SS41I_insert32<0x22, "pinsrd">;
@@ -4314,7 +4198,7 @@ multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> {
imm:$src3)))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W;
let Constraints = "$src1 = $dst" in
defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W;
@@ -4347,7 +4231,7 @@ multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1> {
let Constraints = "$src1 = $dst" in
defm INSERTPS : SS41I_insertf32<0x21, "insertps">;
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V;
def : Pat<(int_x86_sse41_insertps VR128:$src1, VR128:$src2, imm:$src3),
@@ -4517,7 +4401,7 @@ multiclass sse41_fp_binop_rm_avx_s<bits<8> opcss, bits<8> opcsd,
}
// FP round - roundss, roundps, roundsd, roundpd
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
// Intrinsic form
defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
memopv4f32, memopv2f64,
@@ -4552,7 +4436,7 @@ defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round",
// ptest instruction we'll lower to this in X86ISelLowering primarily from
// the intel intrinsic that corresponds to this.
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2),
"vptest\t{$src2, $src1|$src1, $src2}",
[(set EFLAGS, (X86ptest VR128:$src1, (v4f32 VR128:$src2)))]>,
@@ -4595,7 +4479,7 @@ multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC,
OpSize, VEX;
}
-let Defs = [EFLAGS], isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Defs = [EFLAGS], Predicates = [HasAVX] in {
defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, memopv4f32, v4f32>;
defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, memopv8f32, v8f32>;
defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, memopv2f64, v2f64>;
@@ -4644,7 +4528,7 @@ multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr,
(bitconvert (memopv8i16 addr:$src))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw",
int_x86_sse41_phminposuw>, VEX;
defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw",
@@ -4670,7 +4554,7 @@ multiclass SS41I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in
defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw,
0>, VEX_4V;
@@ -4737,7 +4621,7 @@ multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V;
let Constraints = "$src1 = $dst" in
defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>;
@@ -4769,7 +4653,7 @@ multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr,
OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
let isCommutable = 0 in {
defm VBLENDPS : SS41I_binop_rmi_int<0x0C, "vblendps", int_x86_sse41_blendps,
VR128, memopv16i8, i128mem, 0>, VEX_4V;
@@ -4810,7 +4694,7 @@ let Constraints = "$src1 = $dst" in {
}
/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
RegisterClass RC, X86MemOperand x86memop,
PatFrag mem_frag, Intrinsic IntId> {
@@ -4870,7 +4754,7 @@ defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", int_x86_sse41_pblendvb>;
def : Pat<(X86pblendv VR128:$src1, VR128:$src2, XMM0),
(PBLENDVBrr0 VR128:$src1, VR128:$src2)>;
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
"vmovntdqa\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>,
@@ -4904,7 +4788,7 @@ multiclass SS42I_binop_rm_int<bits<8> opc, string OpcodeStr,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in
+let Predicates = [HasAVX] in
defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq,
0>, VEX_4V;
let Constraints = "$src1 = $dst" in
@@ -4936,8 +4820,7 @@ let Defs = [EFLAGS], usesCustomInserter = 1 in {
defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128">, Requires<[HasAVX]>;
}
-let Defs = [XMM0, EFLAGS], isAsmParserOnly = 1,
- Predicates = [HasAVX] in {
+let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in {
def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX;
@@ -4972,7 +4855,7 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in {
defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128">, Requires<[HasAVX]>;
}
-let isAsmParserOnly = 1, Predicates = [HasAVX],
+let Predicates = [HasAVX],
Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in {
def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs),
(ins VR128:$src1, VR128:$src3, i8imm:$src5),
@@ -5007,7 +4890,7 @@ let Defs = [ECX, EFLAGS] in {
}
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPISTRI : SS42AI_pcmpistri<int_x86_sse42_pcmpistri128, "vpcmpistri">,
VEX;
defm VPCMPISTRIA : SS42AI_pcmpistri<int_x86_sse42_pcmpistria128, "vpcmpistri">,
@@ -5046,7 +4929,7 @@ let Defs = [ECX, EFLAGS], Uses = [EAX, EDX] in {
}
}
-let isAsmParserOnly = 1, Predicates = [HasAVX] in {
+let Predicates = [HasAVX] in {
defm VPCMPESTRI : SS42AI_pcmpestri<int_x86_sse42_pcmpestri128, "vpcmpestri">,
VEX;
defm VPCMPESTRIA : SS42AI_pcmpestri<int_x86_sse42_pcmpestria128, "vpcmpestri">,
@@ -5165,7 +5048,7 @@ multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
}
// Perform One Round of an AES Encryption/Decryption Flow
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc",
int_x86_aesni_aesenc, 0>, VEX_4V;
defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast",
@@ -5205,7 +5088,7 @@ def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
// Perform the AES InvMixColumn Transformation
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1),
"vaesimc\t{$src1, $dst|$dst, $src1}",
@@ -5233,7 +5116,7 @@ def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst),
OpSize;
// AES Round Key Generation Assist
-let isAsmParserOnly = 1, Predicates = [HasAVX, HasAES] in {
+let Predicates = [HasAVX, HasAES] in {
def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i8imm:$src2),
"vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -5269,7 +5152,6 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
// Only the AVX version of CLMUL instructions are described here.
// Carry-less Multiplication instructions
-let isAsmParserOnly = 1 in {
def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2, i8imm:$src3),
"vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@@ -5295,13 +5177,10 @@ defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
-} // isAsmParserOnly
-
//===----------------------------------------------------------------------===//
// AVX Instructions
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in {
// Load from memory and broadcast to all elements of the destination operand
class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -5435,8 +5314,6 @@ def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall",
def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper",
[(int_x86_avx_vzeroupper)]>, VEX, Requires<[HasAVX]>;
-} // isAsmParserOnly
-
def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3),
(VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>;
def : Pat<(int_x86_avx_vinsertf128_ps_256 VR256:$src1, VR128:$src2, imm:$src3),
@@ -5622,11 +5499,15 @@ def : Pat<(X86Movddup (bc_v2f64
// Shuffle with UNPCKLPS
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(VUNPCKLPSrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, (memopv8f32 addr:$src2))),
+ (VUNPCKLPSYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, (memopv4f32 addr:$src2))),
(UNPCKLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(VUNPCKLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v8f32 (X86Unpcklpsy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v4f32 (X86Unpcklps VR128:$src1, VR128:$src2)),
(UNPCKLPSrr VR128:$src1, VR128:$src2)>;
@@ -5644,11 +5525,15 @@ def : Pat<(v4f32 (X86Unpckhps VR128:$src1, VR128:$src2)),
// Shuffle with UNPCKLPD
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
(VUNPCKLPDrm VR128:$src1, addr:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, (memopv4f64 addr:$src2))),
+ (VUNPCKLPDYrm VR256:$src1, addr:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, (memopv2f64 addr:$src2))),
(UNPCKLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(VUNPCKLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasAVX]>;
+def : Pat<(v4f64 (X86Unpcklpdy VR256:$src1, VR256:$src2)),
+ (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>, Requires<[HasAVX]>;
def : Pat<(v2f64 (X86Unpcklpd VR128:$src1, VR128:$src2)),
(UNPCKLPDrr VR128:$src1, VR128:$src2)>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 6a24d14..f73cff3 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -34,9 +34,16 @@ let Uses = [EFLAGS] in
def INTO : I<0xce, RawFrm, (outs), (ins), "into", []>;
def INT3 : I<0xcc, RawFrm, (outs), (ins), "int3",
[(int_x86_int (i8 3))]>;
+
+// The long form of "int $3" turns into int3 as a size optimization.
+// FIXME: This doesn't work because InstAlias can't match immediate constants.
+//def : InstAlias<"int\t$3", (INT3)>;
+
+
def INT : Ii8<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap",
[(int_x86_int imm:$trap)]>;
+
def SYSCALL : I<0x05, RawFrm, (outs), (ins), "syscall", []>, TB;
def SYSRETL : I<0x07, RawFrm, (outs), (ins), "sysretl", []>, TB;
def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB,
@@ -207,10 +214,15 @@ def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
-def STRr : I<0x00, MRM1r, (outs GR16:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
-def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
- "str{w}\t{$dst}", []>, TB;
+def STR16r : I<0x00, MRM1r, (outs GR16:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB, OpSize;
+def STR32r : I<0x00, MRM1r, (outs GR32:$dst), (ins),
+ "str{l}\t{$dst}", []>, TB;
+def STR64r : RI<0x00, MRM1r, (outs GR64:$dst), (ins),
+ "str{q}\t{$dst}", []>, TB;
+def STRm : I<0x00, MRM1m, (outs i16mem:$dst), (ins),
+ "str{w}\t{$dst}", []>, TB;
+
def LTRr : I<0x00, MRM3r, (outs), (ins GR16:$src),
"ltr{w}\t{$src}", []>, TB;
def LTRm : I<0x00, MRM3m, (outs), (ins i16mem:$src),
@@ -393,3 +405,23 @@ let Defs = [RDX, RAX], Uses = [RCX] in
let Uses = [RDX, RAX, RCX] in
def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
+
+//===----------------------------------------------------------------------===//
+// VIA PadLock crypto instructions
+let Defs = [RAX, RDI], Uses = [RDX, RDI] in
+ def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
+
+let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
+ def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
+ def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
+ def XCRYPTCTR : I<0xd8, RawFrm, (outs), (ins), "xcryptctr", []>, A7;
+ def XCRYPTCFB : I<0xe0, RawFrm, (outs), (ins), "xcryptcfb", []>, A7;
+ def XCRYPTOFB : I<0xe8, RawFrm, (outs), (ins), "xcryptofb", []>, A7;
+}
+
+let Defs = [RAX, RSI, RDI], Uses = [RAX, RSI, RDI] in {
+ def XSHA1 : I<0xc8, RawFrm, (outs), (ins), "xsha1", []>, A6;
+ def XSHA256 : I<0xd0, RawFrm, (outs), (ins), "xsha256", []>, A6;
+}
+let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in
+ def MONTMUL : I<0xc0, RawFrm, (outs), (ins), "montmul", []>, A6;
diff --git a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
index 6686214..83bba52 100644
--- a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.cpp
@@ -15,7 +15,9 @@
#include "X86TargetMachine.h"
#include "llvm/ADT/Triple.h"
#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ELF.h"
using namespace llvm;
@@ -69,7 +71,22 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
DwarfUsesInlineInfoSection = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+}
+
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const {
+ MCContext &Context = Streamer.getContext();
+ const MCExpr *Res =
+ MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+ const MCExpr *Four = MCConstantExpr::Create(4, Context);
+ return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
+X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
+ : X86MCAsmInfoDarwin(Triple) {
}
X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
@@ -89,7 +106,9 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
SupportsDebugInformation = true;
// Exceptions handling
- ExceptionsType = ExceptionHandling::DwarfTable;
+ ExceptionsType = ExceptionHandling::DwarfCFI;
+
+ DwarfRequiresFrameSection = false;
// OpenBSD has buggy support for .quad in 32-bit mode, just split into two
// .words.
diff --git a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.h b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.h
index 5815225..2cd4c8e 100644
--- a/contrib/llvm/lib/Target/X86/X86MCAsmInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86MCAsmInfo.h
@@ -25,6 +25,14 @@ namespace llvm {
explicit X86MCAsmInfoDarwin(const Triple &Triple);
};
+ struct X86_64MCAsmInfoDarwin : public X86MCAsmInfoDarwin {
+ explicit X86_64MCAsmInfoDarwin(const Triple &Triple);
+ virtual const MCExpr *
+ getExprForPersonalitySymbol(const MCSymbol *Sym,
+ unsigned Encoding,
+ MCStreamer &Streamer) const;
+ };
+
struct X86ELFMCAsmInfo : public MCAsmInfo {
explicit X86ELFMCAsmInfo(const Triple &Triple);
virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const;
diff --git a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
index 0e3b571..f195a67 100644
--- a/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -382,7 +382,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
const TargetInstrDesc &Desc,
raw_ostream &OS) const {
bool HasVEX_4V = false;
- if ((TSFlags >> 32) & X86II::VEX_4V)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
HasVEX_4V = true;
// VEX_R: opcode externsion equivalent to REX.R in
@@ -446,10 +446,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
if (TSFlags & X86II::OpSize)
VEX_PP = 0x01;
- if ((TSFlags >> 32) & X86II::VEX_W)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W)
VEX_W = 1;
- if ((TSFlags >> 32) & X86II::VEX_L)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L)
VEX_L = 1;
switch (TSFlags & X86II::Op0Mask) {
@@ -470,6 +470,8 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::XD: // F2 0F
VEX_PP = 0x3;
break;
+ case X86II::A6: // Bypass: Not used by VEX
+ case X86II::A7: // Bypass: Not used by VEX
case X86II::TB: // Bypass: Not used by VEX
case 0:
break; // No prefix!
@@ -512,13 +514,13 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
}
// To only check operands before the memory address ones, start
- // the search from the begining
+ // the search from the beginning
if (IsDestMem)
CurOp = 0;
// If the last register should be encoded in the immediate field
// do not use any bit from VEX prefix to this register, ignore it
- if ((TSFlags >> 32) & X86II::VEX_I8IMM)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM)
NumOps--;
for (; CurOp != NumOps; ++CurOp) {
@@ -742,6 +744,8 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::TB: // Two-byte opcode prefix
case X86II::T8: // 0F 38
case X86II::TA: // 0F 3A
+ case X86II::A6: // 0F A6
+ case X86II::A7: // 0F A7
Need0FPrefix = true;
break;
case X86II::TF: // F2 0F 38
@@ -786,6 +790,12 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
case X86II::TA: // 0F 3A
EmitByte(0x3A, CurByte, OS);
break;
+ case X86II::A6: // 0F A6
+ EmitByte(0xA6, CurByte, OS);
+ break;
+ case X86II::A7: // 0F A7
+ EmitByte(0xA7, CurByte, OS);
+ break;
}
}
@@ -819,9 +829,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
// It uses the VEX.VVVV field?
bool HasVEX_4V = false;
- if ((TSFlags >> 32) & X86II::VEX)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
HasVEXPrefix = true;
- if ((TSFlags >> 32) & X86II::VEX_4V)
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
HasVEX_4V = true;
@@ -837,7 +847,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
- if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
BaseOpcode = 0x0F; // Weird 3DNow! encoding.
unsigned SrcRegNum = 0;
@@ -994,7 +1004,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
if (CurOp != NumOps) {
// The last source register of a 4 operand instruction in AVX is encoded
// in bits[7:4] of a immediate byte, and bits[3:0] are ignored.
- if ((TSFlags >> 32) & X86II::VEX_I8IMM) {
+ if ((TSFlags >> X86II::VEXShift) & X86II::VEX_I8IMM) {
const MCOperand &MO = MI.getOperand(CurOp++);
bool IsExtReg =
X86InstrInfo::isX86_64ExtendedReg(MO.getReg());
@@ -1017,7 +1027,7 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
}
}
- if ((TSFlags >> 32) & X86II::Has3DNow0F0FOpcode)
+ if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
EmitByte(X86II::getBaseOpcodeFor(TSFlags), CurByte, OS);
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 2f6bd88..37fb0fe 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -308,6 +308,33 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return 0;
}
+const TargetRegisterClass*
+X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{
+ const TargetRegisterClass *Super = RC;
+ TargetRegisterClass::sc_iterator I = RC->superclasses_begin();
+ do {
+ switch (Super->getID()) {
+ case X86::GR8RegClassID:
+ case X86::GR16RegClassID:
+ case X86::GR32RegClassID:
+ case X86::GR64RegClassID:
+ case X86::FR32RegClassID:
+ case X86::FR64RegClassID:
+ case X86::RFP32RegClassID:
+ case X86::RFP64RegClassID:
+ case X86::RFP80RegClassID:
+ case X86::VR128RegClassID:
+ case X86::VR256RegClassID:
+ // Don't return a super-class that would shrink the spill size.
+ // That can happen with the vector and float classes.
+ if (Super->getSize() == RC->getSize())
+ return Super;
+ }
+ Super = *I++;
+ } while (Super);
+ return RC;
+}
+
const TargetRegisterClass *
X86RegisterInfo::getPointerRegClass(unsigned Kind) const {
switch (Kind) {
@@ -337,7 +364,27 @@ X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
else
return &X86::GR32RegClass;
}
- return NULL;
+ return RC;
+}
+
+unsigned
+X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const {
+ const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+
+ unsigned FPDiff = TFI->hasFP(MF) ? 1 : 0;
+ switch (RC->getID()) {
+ default:
+ return 0;
+ case X86::GR32RegClassID:
+ return 4 - FPDiff;
+ case X86::GR64RegClassID:
+ return 12 - FPDiff;
+ case X86::VR128RegClassID:
+ return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4;
+ case X86::VR64RegClassID:
+ return 4;
+ }
}
const unsigned *
@@ -450,7 +497,7 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
// FIXME: It's more complicated than this...
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
report_fatal_error(
- "Stack realignment in presense of dynamic allocas is not supported");
+ "Stack realignment in presence of dynamic allocas is not supported");
// If we've requested that we force align the stack do so now.
if (ForceStackAlign)
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 064be64..9970c52 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -91,6 +91,9 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
+ const TargetRegisterClass*
+ getLargestLegalSuperClass(const TargetRegisterClass *RC) const;
+
/// getPointerRegClass - Returns a TargetRegisterClass used for pointer
/// values.
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
@@ -101,6 +104,9 @@ public:
const TargetRegisterClass *
getCrossCopyRegClass(const TargetRegisterClass *RC) const;
+ unsigned getRegPressureLimit(const TargetRegisterClass *RC,
+ MachineFunction &MF) const;
+
/// getCalleeSavedRegs - Return a null-terminated list of all of the
/// callee-save registers on this target.
const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index 612fac2..fd7a247 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -46,7 +46,8 @@ let Namespace = "X86" in {
def CL : Register<"cl">, DwarfRegNum<[2, 1, 1]>;
def BL : Register<"bl">, DwarfRegNum<[3, 3, 3]>;
- // X86-64 only
+ // X86-64 only, requires REX.
+ let CostPerUse = 1 in {
def SIL : Register<"sil">, DwarfRegNum<[4, 6, 6]>;
def DIL : Register<"dil">, DwarfRegNum<[5, 7, 7]>;
def BPL : Register<"bpl">, DwarfRegNum<[6, 4, 5]>;
@@ -59,6 +60,7 @@ let Namespace = "X86" in {
def R13B : Register<"r13b">, DwarfRegNum<[13, -2, -2]>;
def R14B : Register<"r14b">, DwarfRegNum<[14, -2, -2]>;
def R15B : Register<"r15b">, DwarfRegNum<[15, -2, -2]>;
+ }
// High registers. On x86-64, these cannot be used in any instruction
// with a REX prefix.
@@ -82,8 +84,8 @@ let Namespace = "X86" in {
}
def IP : Register<"ip">, DwarfRegNum<[16]>;
- // X86-64 only
- let SubRegIndices = [sub_8bit] in {
+ // X86-64 only, requires REX.
+ let SubRegIndices = [sub_8bit], CostPerUse = 1 in {
def R8W : RegisterWithSubRegs<"r8w", [R8B]>, DwarfRegNum<[8, -2, -2]>;
def R9W : RegisterWithSubRegs<"r9w", [R9B]>, DwarfRegNum<[9, -2, -2]>;
def R10W : RegisterWithSubRegs<"r10w", [R10B]>, DwarfRegNum<[10, -2, -2]>;
@@ -105,7 +107,8 @@ let Namespace = "X86" in {
def ESP : RegisterWithSubRegs<"esp", [SP]>, DwarfRegNum<[7, 5, 4]>;
def EIP : RegisterWithSubRegs<"eip", [IP]>, DwarfRegNum<[16, 8, 8]>;
- // X86-64 only
+ // X86-64 only, requires REX
+ let CostPerUse = 1 in {
def R8D : RegisterWithSubRegs<"r8d", [R8W]>, DwarfRegNum<[8, -2, -2]>;
def R9D : RegisterWithSubRegs<"r9d", [R9W]>, DwarfRegNum<[9, -2, -2]>;
def R10D : RegisterWithSubRegs<"r10d", [R10W]>, DwarfRegNum<[10, -2, -2]>;
@@ -114,7 +117,7 @@ let Namespace = "X86" in {
def R13D : RegisterWithSubRegs<"r13d", [R13W]>, DwarfRegNum<[13, -2, -2]>;
def R14D : RegisterWithSubRegs<"r14d", [R14W]>, DwarfRegNum<[14, -2, -2]>;
def R15D : RegisterWithSubRegs<"r15d", [R15W]>, DwarfRegNum<[15, -2, -2]>;
- }
+ }}
// 64-bit registers, X86-64 only
let SubRegIndices = [sub_32bit] in {
@@ -127,6 +130,8 @@ let Namespace = "X86" in {
def RBP : RegisterWithSubRegs<"rbp", [EBP]>, DwarfRegNum<[6, -2, -2]>;
def RSP : RegisterWithSubRegs<"rsp", [ESP]>, DwarfRegNum<[7, -2, -2]>;
+ // These also require REX.
+ let CostPerUse = 1 in {
def R8 : RegisterWithSubRegs<"r8", [R8D]>, DwarfRegNum<[8, -2, -2]>;
def R9 : RegisterWithSubRegs<"r9", [R9D]>, DwarfRegNum<[9, -2, -2]>;
def R10 : RegisterWithSubRegs<"r10", [R10D]>, DwarfRegNum<[10, -2, -2]>;
@@ -136,7 +141,7 @@ let Namespace = "X86" in {
def R14 : RegisterWithSubRegs<"r14", [R14D]>, DwarfRegNum<[14, -2, -2]>;
def R15 : RegisterWithSubRegs<"r15", [R15D]>, DwarfRegNum<[15, -2, -2]>;
def RIP : RegisterWithSubRegs<"rip", [EIP]>, DwarfRegNum<[16, -2, -2]>;
- }
+ }}
// MMX Registers. These are actually aliased to ST0 .. ST7
def MM0 : Register<"mm0">, DwarfRegNum<[41, 29, 29]>;
@@ -170,6 +175,7 @@ let Namespace = "X86" in {
def XMM7: Register<"xmm7">, DwarfRegNum<[24, 28, 28]>;
// X86-64 only
+ let CostPerUse = 1 in {
def XMM8: Register<"xmm8">, DwarfRegNum<[25, -2, -2]>;
def XMM9: Register<"xmm9">, DwarfRegNum<[26, -2, -2]>;
def XMM10: Register<"xmm10">, DwarfRegNum<[27, -2, -2]>;
@@ -178,7 +184,7 @@ let Namespace = "X86" in {
def XMM13: Register<"xmm13">, DwarfRegNum<[30, -2, -2]>;
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
- }
+ }}
// YMM Registers, used by AVX instructions
let SubRegIndices = [sub_xmm] in {
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index 42e8193..02754f9 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -178,7 +178,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
- // This requires the copy size to be a constant, preferrably
+ // This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 1ee7312..ba5864e 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -144,7 +144,8 @@ ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const {
/// passed as the second argument. Otherwise it returns null.
const char *X86Subtarget::getBZeroEntry() const {
// Darwin 10 has a __bzero entry point for this purpose.
- if (getDarwinVers() >= 10)
+ if (getTargetTriple().isMacOSX() &&
+ !getTargetTriple().isMacOSXVersionLT(10, 6))
return "__bzero";
return 0;
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index 0a62a02..286a798 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -165,9 +165,15 @@ public:
bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
bool hasVectorUAMem() const { return HasVectorUAMem; }
- bool isTargetDarwin() const { return TargetTriple.getOS() == Triple::Darwin; }
- bool isTargetFreeBSD() const { return TargetTriple.getOS() == Triple::FreeBSD; }
- bool isTargetSolaris() const { return TargetTriple.getOS() == Triple::Solaris; }
+ const Triple &getTargetTriple() const { return TargetTriple; }
+
+ bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
+ bool isTargetFreeBSD() const {
+ return TargetTriple.getOS() == Triple::FreeBSD;
+ }
+ bool isTargetSolaris() const {
+ return TargetTriple.getOS() == Triple::Solaris;
+ }
// ELF is a reasonably sane default and the only other X86 targets we
// support are Darwin and Windows. Just use "not those".
@@ -215,13 +221,6 @@ public:
return PICStyle == PICStyles::StubDynamicNoPIC ||
PICStyle == PICStyles::StubPIC; }
- /// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
- /// 10 = Snow Leopard, etc.
- unsigned getDarwinVers() const {
- if (isTargetDarwin()) return TargetTriple.getDarwinMajorNumber();
- return 0;
- }
-
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 889c824..7483329 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -26,19 +26,18 @@ using namespace llvm;
static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
- return new X86MCAsmInfoDarwin(TheTriple);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (TheTriple.getEnvironment() == Triple::MachO)
- return new X86MCAsmInfoDarwin(TheTriple);
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+ if (TheTriple.getArch() == Triple::x86_64)
+ return new X86_64MCAsmInfoDarwin(TheTriple);
else
- return new X86MCAsmInfoCOFF(TheTriple);
- default:
- return new X86ELFMCAsmInfo(TheTriple);
+ return new X86MCAsmInfoDarwin(TheTriple);
}
+
+ if (TheTriple.isOSWindows())
+ return new X86MCAsmInfoCOFF(TheTriple);
+
+ return new X86ELFMCAsmInfo(TheTriple);
}
static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
@@ -48,19 +47,14 @@ static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
bool RelaxAll,
bool NoExecStack) {
Triple TheTriple(TT);
- switch (TheTriple.getOS()) {
- case Triple::Darwin:
+
+ if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO)
return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
- case Triple::MinGW32:
- case Triple::Cygwin:
- case Triple::Win32:
- if (TheTriple.getEnvironment() == Triple::MachO)
- return createMachOStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll);
- else
- return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
- default:
- return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
- }
+
+ if (TheTriple.isOSWindows())
+ return createWinCOFFStreamer(Ctx, TAB, *_Emitter, _OS, RelaxAll);
+
+ return createELFStreamer(Ctx, TAB, _OS, _Emitter, RelaxAll, NoExecStack);
}
extern "C" void LLVMInitializeX86Target() {
@@ -96,11 +90,11 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: X86TargetMachine(T, TT, FS, false),
DataLayout(getSubtargetImpl()->isTargetDarwin() ?
- "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
(getSubtargetImpl()->isTargetCygMing() ||
getSubtargetImpl()->isTargetWindows()) ?
- "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-n8:16:32" :
- "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32"),
+ "e-p:32:32-f64:64:64-i64:64:64-f80:32:32-f128:128:128-n8:16:32" :
+ "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-f128:128:128-n8:16:32"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
@@ -111,7 +105,7 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: X86TargetMachine(T, TT, FS, true),
- DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64"),
+ DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this),
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index c15dfbb..1231798 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -38,6 +38,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer);
}
+MCSymbol *X8664_MachoTargetObjectFile::
+getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const {
+ return Mang->getSymbol(GV);
+}
+
unsigned X8632_ELFTargetObjectFile::getPersonalityEncoding() const {
if (TM.getRelocationModel() == Reloc::PIC_)
return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
@@ -52,7 +58,7 @@ unsigned X8632_ELFTargetObjectFile::getLSDAEncoding() const {
return DW_EH_PE_absptr;
}
-unsigned X8632_ELFTargetObjectFile::getFDEEncoding() const {
+unsigned X8632_ELFTargetObjectFile::getFDEEncoding(bool FDE) const {
if (TM.getRelocationModel() == Reloc::PIC_)
return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
else
@@ -91,17 +97,14 @@ unsigned X8664_ELFTargetObjectFile::getLSDAEncoding() const {
return DW_EH_PE_absptr;
}
-unsigned X8664_ELFTargetObjectFile::getFDEEncoding() const {
- CodeModel::Model Model = TM.getCodeModel();
- if (TM.getRelocationModel() == Reloc::PIC_)
- return DW_EH_PE_pcrel | (Model == CodeModel::Small ||
- Model == CodeModel::Medium ?
- DW_EH_PE_sdata4 : DW_EH_PE_sdata8);
+unsigned X8664_ELFTargetObjectFile::getFDEEncoding(bool CFI) const {
+ if (CFI)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- if (Model == CodeModel::Small || Model == CodeModel::Medium)
- return DW_EH_PE_udata4;
+ if (TM.getRelocationModel() == Reloc::PIC_)
+ return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
- return DW_EH_PE_absptr;
+ return DW_EH_PE_udata4;
}
unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
index f2fd49c..e21b5bf 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -25,6 +25,12 @@ namespace llvm {
getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding,
MCStreamer &Streamer) const;
+
+ // getCFIPersonalitySymbol - The symbol that gets passed to
+ // .cfi_personality.
+ virtual MCSymbol *
+ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang,
+ MachineModuleInfo *MMI) const;
};
class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
@@ -34,7 +40,7 @@ namespace llvm {
:TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
virtual unsigned getTTypeEncoding() const;
};
@@ -45,7 +51,7 @@ namespace llvm {
:TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
+ virtual unsigned getFDEEncoding(bool CFI) const;
virtual unsigned getTTypeEncoding() const;
};
OpenPOWER on IntegriCloud