diff options
Diffstat (limited to 'contrib/llvm/lib/Target/X86/Disassembler')
4 files changed, 173 insertions, 103 deletions
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp index 4a0d2ec..3aacb20 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp @@ -21,13 +21,16 @@ #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" -#include "llvm/Target/TargetRegistry.h" +#include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #define GET_REGINFO_ENUM #include "X86GenRegisterInfo.inc" +#define GET_INSTRINFO_ENUM +#include "X86GenInstrInfo.inc" #include "X86GenEDInfo.inc" using namespace llvm; @@ -64,8 +67,8 @@ extern Target TheX86_32Target, TheX86_64Target; static bool translateInstruction(MCInst &target, InternalInstruction &source); -X86GenericDisassembler::X86GenericDisassembler(DisassemblerMode mode) : - MCDisassembler(), +X86GenericDisassembler::X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode) : + MCDisassembler(STI), fMode(mode) { } @@ -106,28 +109,34 @@ static void logger(void* arg, const char* log) { // Public interface for the disassembler // -bool X86GenericDisassembler::getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const { +MCDisassembler::DecodeStatus +X86GenericDisassembler::getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const { InternalInstruction internalInstr; + + dlog_t loggerFn = logger; + if (&vStream == &nulls()) + loggerFn = 0; // Disable logging completely if it's going to nulls(). int ret = decodeInstruction(&internalInstr, regionReader, (void*)®ion, - logger, + loggerFn, (void*)&vStream, address, fMode); if (ret) { size = internalInstr.readerCursor - address; - return false; + return Fail; } else { size = internalInstr.length; - return !translateInstruction(instr, internalInstr); + return (!translateInstruction(instr, internalInstr)) ? Success : Fail; } } @@ -183,8 +192,46 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate, break; } } + // By default sign-extend all X86 immediates based on their encoding. + else if (type == TYPE_IMM8 || type == TYPE_IMM16 || type == TYPE_IMM32 || + type == TYPE_IMM64) { + uint32_t Opcode = mcInst.getOpcode(); + switch (operand.encoding) { + default: + break; + case ENCODING_IB: + // Special case those X86 instructions that use the imm8 as a set of + // bits, bit count, etc. and are not sign-extend. + if (Opcode != X86::BLENDPSrri && Opcode != X86::BLENDPDrri && + Opcode != X86::PBLENDWrri && Opcode != X86::MPSADBWrri && + Opcode != X86::DPPSrri && Opcode != X86::DPPDrri && + Opcode != X86::INSERTPSrr && Opcode != X86::VBLENDPSYrri && + Opcode != X86::VBLENDPSYrmi && Opcode != X86::VBLENDPDYrri && + Opcode != X86::VBLENDPDYrmi && Opcode != X86::VPBLENDWrri && + Opcode != X86::VMPSADBWrri && Opcode != X86::VDPPSYrri && + Opcode != X86::VDPPSYrmi && Opcode != X86::VDPPDrri && + Opcode != X86::VINSERTPSrr) + type = TYPE_MOFFS8; + break; + case ENCODING_IW: + type = TYPE_MOFFS16; + break; + case ENCODING_ID: + type = TYPE_MOFFS32; + break; + case ENCODING_IO: + type = TYPE_MOFFS64; + break; + } + } switch (type) { + case TYPE_XMM128: + mcInst.addOperand(MCOperand::CreateReg(X86::XMM0 + (immediate >> 4))); + return; + case TYPE_XMM256: + mcInst.addOperand(MCOperand::CreateReg(X86::YMM0 + (immediate >> 4))); + return; case TYPE_MOFFS8: case TYPE_REL8: if(immediate & 0x80) @@ -543,12 +590,12 @@ static bool translateInstruction(MCInst &mcInst, return false; } -static MCDisassembler *createX86_32Disassembler(const Target &T) { - return new X86Disassembler::X86_32Disassembler; +static MCDisassembler *createX86_32Disassembler(const Target &T, const MCSubtargetInfo &STI) { + return new X86Disassembler::X86_32Disassembler(STI); } -static MCDisassembler *createX86_64Disassembler(const Target &T) { - return new X86Disassembler::X86_64Disassembler; +static MCDisassembler *createX86_64Disassembler(const Target &T, const MCSubtargetInfo &STI) { + return new X86Disassembler::X86_64Disassembler(STI); } extern "C" void LLVMInitializeX86Disassembler() { diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h index 550cf9d..6ac9a0f 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.h @@ -92,6 +92,7 @@ struct InternalInstruction; namespace llvm { class MCInst; +class MCSubtargetInfo; class MemoryObject; class raw_ostream; @@ -107,16 +108,17 @@ protected: /// Constructor - Initializes the disassembler. /// /// @param mode - The X86 architecture mode to decode for. - X86GenericDisassembler(DisassemblerMode mode); + X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode); public: ~X86GenericDisassembler(); /// getInstruction - See MCDisassembler. - bool getInstruction(MCInst &instr, - uint64_t &size, - const MemoryObject ®ion, - uint64_t address, - raw_ostream &vStream) const; + DecodeStatus getInstruction(MCInst &instr, + uint64_t &size, + const MemoryObject ®ion, + uint64_t address, + raw_ostream &vStream, + raw_ostream &cStream) const; /// getEDInfo - See MCDisassembler. EDInstInfo *getEDInfo() const; @@ -127,24 +129,24 @@ private: /// X86_16Disassembler - 16-bit X86 disassembler. class X86_16Disassembler : public X86GenericDisassembler { public: - X86_16Disassembler() : - X86GenericDisassembler(MODE_16BIT) { + X86_16Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_16BIT) { } }; /// X86_16Disassembler - 32-bit X86 disassembler. class X86_32Disassembler : public X86GenericDisassembler { public: - X86_32Disassembler() : - X86GenericDisassembler(MODE_32BIT) { + X86_32Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_32BIT) { } }; /// X86_16Disassembler - 64-bit X86 disassembler. class X86_64Disassembler : public X86GenericDisassembler { public: - X86_64Disassembler() : - X86GenericDisassembler(MODE_64BIT) { + X86_64Disassembler(const MCSubtargetInfo &STI) : + X86GenericDisassembler(STI, MODE_64BIT) { } }; diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index de1610b..f9b0fe5 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -58,8 +58,8 @@ static InstructionContext contextForAttrs(uint8_t attrMask) { * @return - TRUE if the ModR/M byte is required, FALSE otherwise. */ static int modRMRequired(OpcodeType type, - InstructionContext insnContext, - uint8_t opcode) { + InstructionContext insnContext, + uint8_t opcode) { const struct ContextDecision* decision = 0; switch (type) { @@ -391,7 +391,7 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || byte1 & 0x8) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 3; insn->necessaryPrefixLocation = insn->readerCursor - 1; } @@ -406,12 +406,14 @@ static int readPrefixes(struct InternalInstruction* insn) { consumeByte(insn, &insn->vexPrefix[2]); /* We simulate the REX prefix for simplicity's sake */ - - insn->rexPrefix = 0x40 - | (wFromVEX3of3(insn->vexPrefix[2]) << 3) - | (rFromVEX2of3(insn->vexPrefix[1]) << 2) - | (xFromVEX2of3(insn->vexPrefix[1]) << 1) - | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (wFromVEX3of3(insn->vexPrefix[2]) << 3) + | (rFromVEX2of3(insn->vexPrefix[1]) << 2) + | (xFromVEX2of3(insn->vexPrefix[1]) << 1) + | (bFromVEX2of3(insn->vexPrefix[1]) << 0); + } switch (ppFromVEX3of3(insn->vexPrefix[2])) { @@ -433,7 +435,7 @@ static int readPrefixes(struct InternalInstruction* insn) { return -1; } - if (insn->mode == MODE_64BIT || byte1 & 0x8) { + if (insn->mode == MODE_64BIT || (byte1 & 0xc0) == 0xc0) { insn->vexSize = 2; } else { @@ -444,8 +446,10 @@ static int readPrefixes(struct InternalInstruction* insn) { insn->vexPrefix[0] = byte; consumeByte(insn, &insn->vexPrefix[1]); - insn->rexPrefix = 0x40 - | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + if (insn->mode == MODE_64BIT) { + insn->rexPrefix = 0x40 + | (rFromVEX2of2(insn->vexPrefix[1]) << 2); + } switch (ppFromVEX2of2(insn->vexPrefix[1])) { @@ -700,34 +704,6 @@ static BOOL is16BitEquvalent(const char* orig, const char* equiv) { } /* - * is64BitEquivalent - Determines whether two instruction names refer to - * equivalent instructions but one is 64-bit whereas the other is not. - * - * @param orig - The instruction that is not 64-bit - * @param equiv - The instruction that is 64-bit - */ -static BOOL is64BitEquivalent(const char* orig, const char* equiv) { - off_t i; - - for (i = 0;; i++) { - if (orig[i] == '\0' && equiv[i] == '\0') - return TRUE; - if (orig[i] == '\0' || equiv[i] == '\0') - return FALSE; - if (orig[i] != equiv[i]) { - if ((orig[i] == 'W' || orig[i] == 'L') && equiv[i] == 'Q') - continue; - if ((orig[i] == '1' || orig[i] == '3') && equiv[i] == '6') - continue; - if ((orig[i] == '6' || orig[i] == '2') && equiv[i] == '4') - continue; - return FALSE; - } - } -} - - -/* * getID - Determines the ID of an instruction, consuming the ModR/M byte as * appropriate for extended and escape opcodes. Determines the attributes and * context for the instruction before doing so. @@ -763,8 +739,6 @@ static int getID(struct InternalInstruction* insn) { break; } - if (wFromVEX3of3(insn->vexPrefix[2])) - attrMask |= ATTR_REXW; if (lFromVEX3of3(insn->vexPrefix[2])) attrMask |= ATTR_VEXL; } @@ -789,63 +763,55 @@ static int getID(struct InternalInstruction* insn) { } } else { - if (insn->rexPrefix & 0x08) - attrMask |= ATTR_REXW; - if (isPrefixAtLocation(insn, 0x66, insn->necessaryPrefixLocation)) attrMask |= ATTR_OPSIZE; else if (isPrefixAtLocation(insn, 0xf3, insn->necessaryPrefixLocation)) attrMask |= ATTR_XS; else if (isPrefixAtLocation(insn, 0xf2, insn->necessaryPrefixLocation)) attrMask |= ATTR_XD; - } + if (insn->rexPrefix & 0x08) + attrMask |= ATTR_REXW; + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; /* The following clauses compensate for limitations of the tables. */ - if ((attrMask & ATTR_XD) && (attrMask & ATTR_REXW)) { + if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { /* - * Although for SSE instructions it is usually necessary to treat REX.W+F2 - * as F2 for decode (in the absence of a 64BIT_REXW_XD category) there is - * an occasional instruction where F2 is incidental and REX.W is the more - * significant. If the decoded instruction is 32-bit and adding REX.W - * instead of F2 changes a 32 to a 64, we adopt the new encoding. + * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit + * has precedence since there are no L-bit with W-bit entries in the tables. + * So if the L-bit isn't significant we should use the W-bit instead. */ - + const struct InstructionSpecifier *spec; - uint16_t instructionIDWithREXw; - const struct InstructionSpecifier *specWithREXw; - + uint16_t instructionIDWithWBit; + const struct InstructionSpecifier *specWithWBit; + spec = specifierForUID(instructionID); - - if (getIDWithAttrMask(&instructionIDWithREXw, + + if (getIDWithAttrMask(&instructionIDWithWBit, insn, - attrMask & (~ATTR_XD))) { - /* - * Decoding with REX.w would yield nothing; give up and return original - * decode. - */ - + (attrMask & (~ATTR_VEXL)) | ATTR_REXW)) { insn->instructionID = instructionID; insn->spec = spec; return 0; } - - specWithREXw = specifierForUID(instructionIDWithREXw); - - if (is64BitEquivalent(spec->name, specWithREXw->name)) { - insn->instructionID = instructionIDWithREXw; - insn->spec = specWithREXw; + + specWithWBit = specifierForUID(instructionIDWithWBit); + + if (instructionID != instructionIDWithWBit) { + insn->instructionID = instructionIDWithWBit; + insn->spec = specWithWBit; } else { insn->instructionID = instructionID; insn->spec = spec; } return 0; } - + if (insn->prefixPresent[0x66] && !(attrMask & ATTR_OPSIZE)) { /* * The instruction tables make no distinction between instructions that @@ -885,6 +851,43 @@ static int getID(struct InternalInstruction* insn) { } return 0; } + + if (insn->opcodeType == ONEBYTE && insn->opcode == 0x90 && + insn->rexPrefix & 0x01) { + /* + * NOOP shouldn't decode as NOOP if REX.b is set. Instead + * it should decode as XCHG %r8, %eax. + */ + + const struct InstructionSpecifier *spec; + uint16_t instructionIDWithNewOpcode; + const struct InstructionSpecifier *specWithNewOpcode; + + spec = specifierForUID(instructionID); + + /* Borrow opcode from one of the other XCHGar opcodes */ + insn->opcode = 0x91; + + if (getIDWithAttrMask(&instructionIDWithNewOpcode, + insn, + attrMask)) { + insn->opcode = 0x90; + + insn->instructionID = instructionID; + insn->spec = spec; + return 0; + } + + specWithNewOpcode = specifierForUID(instructionIDWithNewOpcode); + + /* Change back */ + insn->opcode = 0x90; + + insn->instructionID = instructionIDWithNewOpcode; + insn->spec = specWithNewOpcode; + + return 0; + } insn->instructionID = instructionID; insn->spec = specifierForUID(insn->instructionID); @@ -1434,11 +1437,10 @@ static int readImmediate(struct InternalInstruction* insn, uint8_t size) { } /* - * readVVVV - Consumes an immediate operand from an instruction, given the - * desired operand size. + * readVVVV - Consumes vvvv from an instruction if it has a VEX prefix. * * @param insn - The instruction whose operand is to be read. - * @return - 0 if the immediate was successfully consumed; nonzero + * @return - 0 if the vvvv was successfully consumed; nonzero * otherwise. */ static int readVVVV(struct InternalInstruction* insn) { @@ -1451,6 +1453,9 @@ static int readVVVV(struct InternalInstruction* insn) { else return -1; + if (insn->mode != MODE_64BIT) + insn->vvvv &= 0x7; + return 0; } @@ -1463,8 +1468,14 @@ static int readVVVV(struct InternalInstruction* insn) { */ static int readOperands(struct InternalInstruction* insn) { int index; + int hasVVVV, needVVVV; dbgprintf(insn, "readOperands()"); + + /* If non-zero vvvv specified, need to make sure one of the operands + uses it. */ + hasVVVV = !readVVVV(insn); + needVVVV = hasVVVV && (insn->vvvv != 0); for (index = 0; index < X86_MAX_OPERANDS; ++index) { switch (insn->spec->operands[index].encoding) { @@ -1537,7 +1548,8 @@ static int readOperands(struct InternalInstruction* insn) { return -1; break; case ENCODING_VVVV: - if (readVVVV(insn)) + needVVVV = 0; /* Mark that we have found a VVVV operand. */ + if (!hasVVVV) return -1; if (fixupReg(insn, &insn->spec->operands[index])) return -1; @@ -1549,6 +1561,9 @@ static int readOperands(struct InternalInstruction* insn) { return -1; } } + + /* If we didn't find ENCODING_VVVV operand, but non-zero vvvv present, fail */ + if (needVVVV) return -1; return 0; } diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 70315ed..8b79335 100644 --- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -81,12 +81,18 @@ enum attributeBits { "but not the operands") \ ENUM_ENTRY(IC_XS, 2, "may say something about the opcode " \ "but not the operands") \ + ENUM_ENTRY(IC_XD_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ + ENUM_ENTRY(IC_XS_OPSIZE, 3, "requires an OPSIZE prefix, so " \ + "operands change width") \ ENUM_ENTRY(IC_64BIT_REXW, 4, "requires a REX.W prefix, so operands "\ "change width; overrides IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_OPSIZE, 3, "Just as meaningful as IC_OPSIZE") \ ENUM_ENTRY(IC_64BIT_XD, 5, "XD instructions are SSE; REX.W is " \ "secondary") \ ENUM_ENTRY(IC_64BIT_XS, 5, "Just as meaningful as IC_64BIT_XD") \ + ENUM_ENTRY(IC_64BIT_XD_OPSIZE, 3, "Just as meaningful as IC_XD_OPSIZE") \ + ENUM_ENTRY(IC_64BIT_XS_OPSIZE, 3, "Just as meaningful as IC_XS_OPSIZE") \ ENUM_ENTRY(IC_64BIT_REXW_XS, 6, "OPSIZE could mean a different " \ "opcode") \ ENUM_ENTRY(IC_64BIT_REXW_XD, 6, "Just as meaningful as " \ @@ -104,7 +110,7 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_W_OPSIZE, 4, "requires VEX, W, and OpSize") \ ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ - ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XS prefix")\ + ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") |