diff options
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp | 10 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h | 11 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp | 7 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp | 5 | ||||
-rw-r--r-- | lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h | 13 | ||||
-rw-r--r-- | lib/Target/X86/CMakeLists.txt | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86ELFWriterInfo.cpp | 12 | ||||
-rw-r--r-- | lib/Target/X86/X86ELFWriterInfo.h | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 29 | ||||
-rw-r--r-- | lib/Target/X86/X86FloatingPoint.cpp | 15 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 200 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.h | 7 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrBuilder.h | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.cpp | 3 | ||||
-rw-r--r-- | lib/Target/X86/X86InstrInfo.td | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86RegisterInfo.td | 27 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 2 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.cpp | 10 | ||||
-rw-r--r-- | lib/Target/X86/X86TargetMachine.h | 1 |
21 files changed, 295 insertions, 79 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp index e75cfc5..127f228 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp @@ -154,21 +154,13 @@ void X86ATTAsmPrinter::decorateName(std::string &Name, } } - - void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) { + unsigned FnAlign = MF.getAlignment(); const Function *F = MF.getFunction(); decorateName(CurrentFnName, F); SwitchToSection(TAI->SectionForGlobal(F)); - - // FIXME: A function's alignment should be part of MachineFunction. There - // shouldn't be a policy decision here. - unsigned FnAlign = 4; - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; - switch (F->getLinkage()) { default: assert(0 && "Unknown linkage type!"); case Function::InternalLinkage: // Symbols default to internal. diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h index bd96115..f47daf1 100644 --- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h @@ -38,9 +38,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { MCStreamer *Streamer; public: explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V) { + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) { Subtarget = &TM.getSubtarget<X86Subtarget>(); Context = 0; Streamer = 0; @@ -140,6 +139,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { void printi128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printi256mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } void printf32mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } @@ -152,6 +154,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter { void printf128mem(const MachineInstr *MI, unsigned OpNo) { printMemReference(MI, OpNo); } + void printf256mem(const MachineInstr *MI, unsigned OpNo) { + printMemReference(MI, OpNo); + } void printlea32mem(const MachineInstr *MI, unsigned OpNo) { printLeaMemReference(MI, OpNo); } diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp index d1623d6..e5d80a4 100644 --- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp @@ -25,15 +25,12 @@ using namespace llvm; /// FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose) { const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>(); if (Subtarget->isFlavorIntel()) - return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); - return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), - OptLevel, verbose); + return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); + return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose); } namespace { diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp index ceae7be..9d4df93 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp @@ -132,6 +132,7 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) { // Print out labels for the function. const Function *F = MF.getFunction(); unsigned CC = F->getCallingConv(); + unsigned FnAlign = MF.getAlignment(); // Populate function information map. Actually, We don't want to populate // non-stdcall or non-fastcall functions' information right now. @@ -141,10 +142,6 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) { decorateName(CurrentFnName, F); SwitchToTextSection("_text", F); - - unsigned FnAlign = 4; - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; switch (F->getLinkage()) { default: assert(0 && "Unsupported linkage type!"); case Function::PrivateLinkage: diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h index 04f2595..a724c3c 100644 --- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h +++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h @@ -26,9 +26,8 @@ namespace llvm { struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM, - const TargetAsmInfo *T, CodeGenOpt::Level OL, - bool V) - : AsmPrinter(O, TM, T, OL, V) {} + const TargetAsmInfo *T, bool V) + : AsmPrinter(O, TM, T, V) {} virtual const char *getPassName() const { return "X86 Intel-Style Assembly Printer"; @@ -76,6 +75,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { O << "XMMWORD PTR "; printMemReference(MI, OpNo); } + void printi256mem(const MachineInstr *MI, unsigned OpNo) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo); + } void printf32mem(const MachineInstr *MI, unsigned OpNo) { O << "DWORD PTR "; printMemReference(MI, OpNo); @@ -92,6 +95,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter { O << "XMMWORD PTR "; printMemReference(MI, OpNo); } + void printf256mem(const MachineInstr *MI, unsigned OpNo) { + O << "YMMWORD PTR "; + printMemReference(MI, OpNo); + } void printlea32mem(const MachineInstr *MI, unsigned OpNo) { O << "DWORD PTR "; printLeaMemReference(MI, OpNo); diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index d982990..7ea0e51 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -27,3 +27,5 @@ add_llvm_target(X86CodeGen X86TargetMachine.cpp X86FastISel.cpp ) + +target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG) diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index fd13b02..22de3f6 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -46,9 +46,7 @@ FunctionPass *createX87FPRegKillInserterPass(); /// assembly code for a MachineFunction to the given output stream, /// using the given target machine description. /// -FunctionPass *createX86CodePrinterPass(raw_ostream &o, - X86TargetMachine &tm, - CodeGenOpt::Level OptLevel, +FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm, bool Verbose); /// createX86CodeEmitterPass - Return a pass that emits the collected X86 code diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 4d26364..47861d5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -119,6 +119,10 @@ def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>; +def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A, + Feature3DNowA]>; +def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A, + Feature3DNowA]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>; diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp index 315118f..912ab0e 100644 --- a/lib/Target/X86/X86ELFWriterInfo.cpp +++ b/lib/Target/X86/X86ELFWriterInfo.cpp @@ -59,18 +59,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const { return 0; } -unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const { - unsigned FnAlign = 4; - - if (F->hasFnAttr(Attribute::OptimizeForSize)) - FnAlign = 1; - - if (F->getAlignment()) - FnAlign = Log2_32(F->getAlignment()); - - return (1 << FnAlign); -} - long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const { if (is64Bit) { switch(RelTy) { diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h index 96485b8..2ba1a0b 100644 --- a/lib/Target/X86/X86ELFWriterInfo.h +++ b/lib/Target/X86/X86ELFWriterInfo.h @@ -41,10 +41,6 @@ namespace llvm { X86ELFWriterInfo(TargetMachine &TM); virtual ~X86ELFWriterInfo(); - /// getFunctionAlignment - Returns the alignment for function 'F', targets - /// with different alignment constraints should overload this method - virtual unsigned getFunctionAlignment(const Function *F) const; - /// getRelocationType - Returns the target specific ELF Relocation type. /// 'MachineRelTy' contains the object code independent relocation type virtual unsigned getRelocationType(unsigned MachineRelTy) const; diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 8a21b35..b336d78 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -452,25 +452,38 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { // Check to see if we've already materialized this // value in a register in this block. - if (unsigned Reg = LocalValueMap[V]) { - AM.Base.Reg = Reg; + DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V); + if (I != LocalValueMap.end() && I->second != 0) { + AM.Base.Reg = I->second; AM.GV = 0; return true; } - // Issue load from stub if necessary. + + // Issue load from stub. unsigned Opc = 0; const TargetRegisterClass *RC = NULL; + X86AddressMode StubAM; + StubAM.Base.Reg = AM.Base.Reg; + StubAM.GV = AM.GV; + if (TLI.getPointerTy() == MVT::i32) { Opc = X86::MOV32rm; RC = X86::GR32RegisterClass; + + if (Subtarget->isPICStyleGOT() && + TM.getRelocationModel() == Reloc::PIC_) + StubAM.GVOpFlags = X86II::MO_GOT; + } else { Opc = X86::MOV64rm; RC = X86::GR64RegisterClass; + + if (TM.getRelocationModel() != Reloc::Static) { + StubAM.GVOpFlags = X86II::MO_GOTPCREL; + StubAM.Base.Reg = X86::RIP; + } } - X86AddressMode StubAM; - StubAM.Base.Reg = AM.Base.Reg; - StubAM.GV = AM.GV; unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM); @@ -1503,7 +1516,9 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { } else if (Subtarget->isPICStyleGOT()) { OpFlag = X86II::MO_GOTOFF; PICBase = getInstrInfo()->getGlobalBaseReg(&MF); - } + } else if (Subtarget->isPICStyleRIPRel() && + TM.getCodeModel() == CodeModel::Small) + PICBase = X86::RIP; } // Create the load from the constant pool. diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp index ed4eb44..37027ee 100644 --- a/lib/Target/X86/X86FloatingPoint.cpp +++ b/lib/Target/X86/X86FloatingPoint.cpp @@ -990,16 +990,21 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) { } case X86::FpSET_ST0_32: case X86::FpSET_ST0_64: - case X86::FpSET_ST0_80: + case X86::FpSET_ST0_80: { + unsigned Op0 = getFPReg(MI->getOperand(0)); + // FpSET_ST0_80 is generated by copyRegToReg for both function return // and inline assembly with the "st" constrain. In the latter case, - // it is possible for FP0 to be alive after this instruction. - if (!MI->killsRegister(X86::FP0)) { - // Duplicate ST0 - duplicateToTop(0, 0, I); + // it is possible for ST(0) to be alive after this instruction. + if (!MI->killsRegister(X86::FP0 + Op0)) { + // Duplicate Op0 + duplicateToTop(0, 7 /*temp register*/, I); + } else { + moveToTop(Op0, I); } --StackTop; // "Forget" we have something on the top of stack! break; + } case X86::FpSET_ST1_32: case X86::FpSET_ST1_64: case X86::FpSET_ST1_80: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9614e69..5a6294a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -700,6 +700,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Do not attempt to custom lower non-power-of-2 vectors if (!isPowerOf2_32(VT.getVectorNumElements())) continue; + // Do not attempt to custom lower non-128-bit vectors + if (!VT.is128BitVector()) + continue; setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); @@ -718,17 +721,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64. - for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) { - setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64); - setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote); - AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64); + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) { + MVT VT = (MVT::SimpleValueType)i; + + // Do not attempt to promote non-128-bit vectors + if (!VT.is128BitVector()) { + continue; + } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v2i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v2i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v2i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v2i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v2i64); } setTruncStoreAction(MVT::f64, MVT::f32, Expand); @@ -775,6 +784,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::VSETCC, MVT::v2i64, Custom); } + if (!UseSoftFloat && Subtarget->hasAVX()) { + addRegisterClass(MVT::v8f32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4f64, X86::VR256RegisterClass); + addRegisterClass(MVT::v8i32, X86::VR256RegisterClass); + addRegisterClass(MVT::v4i64, X86::VR256RegisterClass); + + setOperationAction(ISD::LOAD, MVT::v8f32, Legal); + setOperationAction(ISD::LOAD, MVT::v8i32, Legal); + setOperationAction(ISD::LOAD, MVT::v4f64, Legal); + setOperationAction(ISD::LOAD, MVT::v4i64, Legal); + setOperationAction(ISD::FADD, MVT::v8f32, Legal); + setOperationAction(ISD::FSUB, MVT::v8f32, Legal); + setOperationAction(ISD::FMUL, MVT::v8f32, Legal); + setOperationAction(ISD::FDIV, MVT::v8f32, Legal); + setOperationAction(ISD::FSQRT, MVT::v8f32, Legal); + setOperationAction(ISD::FNEG, MVT::v8f32, Custom); + //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom); + //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom); + //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom); + //setOperationAction(ISD::SELECT, MVT::v8f32, Custom); + //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom); + + // Operations to consider commented out -v16i16 v32i8 + //setOperationAction(ISD::ADD, MVT::v16i16, Legal); + setOperationAction(ISD::ADD, MVT::v8i32, Custom); + setOperationAction(ISD::ADD, MVT::v4i64, Custom); + //setOperationAction(ISD::SUB, MVT::v32i8, Legal); + //setOperationAction(ISD::SUB, MVT::v16i16, Legal); + setOperationAction(ISD::SUB, MVT::v8i32, Custom); + setOperationAction(ISD::SUB, MVT::v4i64, Custom); + //setOperationAction(ISD::MUL, MVT::v16i16, Legal); + setOperationAction(ISD::FADD, MVT::v4f64, Legal); + setOperationAction(ISD::FSUB, MVT::v4f64, Legal); + setOperationAction(ISD::FMUL, MVT::v4f64, Legal); + setOperationAction(ISD::FDIV, MVT::v4f64, Legal); + setOperationAction(ISD::FSQRT, MVT::v4f64, Legal); + setOperationAction(ISD::FNEG, MVT::v4f64, Custom); + + setOperationAction(ISD::VSETCC, MVT::v4f64, Custom); + // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom); + // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom); + setOperationAction(ISD::VSETCC, MVT::v8i32, Custom); + + // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom); + // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom); + // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom); + + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom); + +#if 0 + // Not sure we want to do this since there are no 256-bit integer + // operations in AVX + + // Custom lower build_vector, vector_shuffle, and extract_vector_elt. + // This includes 256-bit vectors + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) { + MVT VT = (MVT::SimpleValueType)i; + + // Do not attempt to custom lower non-power-of-2 vectors + if (!isPowerOf2_32(VT.getVectorNumElements())) + continue; + + setOperationAction(ISD::BUILD_VECTOR, VT, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); + } + + if (Subtarget->is64Bit()) { + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom); + } +#endif + +#if 0 + // Not sure we want to do this since there are no 256-bit integer + // operations in AVX + + // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64. + // Including 256-bit vectors + for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) { + MVT VT = (MVT::SimpleValueType)i; + + if (!VT.is256BitVector()) { + continue; + } + setOperationAction(ISD::AND, VT, Promote); + AddPromotedToType (ISD::AND, VT, MVT::v4i64); + setOperationAction(ISD::OR, VT, Promote); + AddPromotedToType (ISD::OR, VT, MVT::v4i64); + setOperationAction(ISD::XOR, VT, Promote); + AddPromotedToType (ISD::XOR, VT, MVT::v4i64); + setOperationAction(ISD::LOAD, VT, Promote); + AddPromotedToType (ISD::LOAD, VT, MVT::v4i64); + setOperationAction(ISD::SELECT, VT, Promote); + AddPromotedToType (ISD::SELECT, VT, MVT::v4i64); + } + + setTruncStoreAction(MVT::f64, MVT::f32, Expand); +#endif + } + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -805,6 +922,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); setTargetDAGCombine(ISD::STORE); + setTargetDAGCombine(ISD::MEMBARRIER); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); @@ -909,6 +1027,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, return Table; } +/// getFunctionAlignment - Return the Log2 alignment of this function. +unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const { + return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4; +} + //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -5690,7 +5813,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl, Args.push_back(Entry); std::pair<SDValue,SDValue> CallResult = LowerCallTo(Chain, Type::VoidTy, false, false, false, false, - CallingConv::C, false, + 0, CallingConv::C, false, DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl); return CallResult.second; } @@ -8454,6 +8577,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) { return SDValue(); } +// On X86 and X86-64, atomic operations are lowered to locked instructions. +// Locked instructions, in turn, have implicit fence semantics (all memory +// operations are flushed before issuing the locked instruction, and the +// are not buffered), so we can fold away the common pattern of +// fence-atomic-fence. +static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) { + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return DAG.UpdateNodeOperands(atomic, fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)); + default: + return SDValue(); + } +} + SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -8472,6 +8647,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FAND: return PerformFANDCombine(N, DAG); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); + case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG); } return SDValue(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index fb4eb68..ffed46c 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -380,7 +380,7 @@ namespace llvm { MVT getOptimalMemOpType(uint64_t Size, unsigned Align, bool isSrcConst, bool isSrcStr, SelectionDAG &DAG) const; - + /// LowerOperation - Provide custom lowering hooks for some operations. /// virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG); @@ -533,7 +533,10 @@ namespace llvm { , SmallSet<Instruction*, 8> & #endif ); - + + /// getFunctionAlignment - Return the Log2 alignment of this function. + virtual unsigned getFunctionAlignment(const Function *F) const; + private: /// Subtarget - Keep a pointer to the X86Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h index b50dd65..6359542 100644 --- a/lib/Target/X86/X86InstrBuilder.h +++ b/lib/Target/X86/X86InstrBuilder.h @@ -49,8 +49,10 @@ struct X86AddressMode { unsigned IndexReg; unsigned Disp; GlobalValue *GV; + unsigned GVOpFlags; - X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) { + X86AddressMode() + : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) { Base.Reg = 0; } }; @@ -113,7 +115,7 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB, assert (0); MIB.addImm(AM.Scale).addReg(AM.IndexReg); if (AM.GV) - return MIB.addGlobalAddress(AM.GV, AM.Disp); + return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags); else return MIB.addImm(AM.Disp); } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 21f71ec..e5d84c5 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2459,7 +2459,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, getDefRegState(MO.isDef()) | RegState::Implicit | getKillRegState(MO.isKill()) | - getDeadRegState(MO.isDead())); + getDeadRegState(MO.isDead()) | + getUndefRegState(MO.isUndef())); } // Change CMP32ri r, 0 back to TEST32rr r, r, etc. unsigned NewOpc = 0; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index a6b0880..03df10d 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -180,10 +180,12 @@ def i16mem : X86MemOperand<"printi16mem">; def i32mem : X86MemOperand<"printi32mem">; def i64mem : X86MemOperand<"printi64mem">; def i128mem : X86MemOperand<"printi128mem">; +def i256mem : X86MemOperand<"printi256mem">; def f32mem : X86MemOperand<"printf32mem">; def f64mem : X86MemOperand<"printf64mem">; def f80mem : X86MemOperand<"printf80mem">; def f128mem : X86MemOperand<"printf128mem">; +def f256mem : X86MemOperand<"printf256mem">; // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of // plain GR64, so that it doesn't potentially require a REX prefix. diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td index 996baa0..2e6f017 100644 --- a/lib/Target/X86/X86RegisterInfo.td +++ b/lib/Target/X86/X86RegisterInfo.td @@ -157,6 +157,24 @@ let Namespace = "X86" in { def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>; def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>; + // YMM Registers, used by AVX instructions + def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>; + def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>; + def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>; + def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>; + def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>; + def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>; + def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>; + def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>; + def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>; + def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>; + def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>; + def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>; + def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>; + def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>; + def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>; + def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>; + // Floating point stack registers def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>; def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>; @@ -229,6 +247,11 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI, [EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI, R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>; +def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15], + [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, + XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>; + //===----------------------------------------------------------------------===// // Register Class Definitions... now that we have all of the pieces, define the // top-level register classes. The order specified in the register list is @@ -755,6 +778,10 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128, } }]; } +def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256, + [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, + YMM8, YMM9, YMM10, YMM11, + YMM12, YMM13, YMM14, YMM15]>; // Status flags registers. def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> { diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index f4f6cce..0d1434f 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -142,7 +142,7 @@ public: bool hasSSE4A() const { return HasSSE4A; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } - bool hasAVX() const { return hasAVX(); } + bool hasAVX() const { return HasAVX; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 67dcd01..b000914 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -226,7 +226,7 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM, assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose)); + PM.add(AsmPrinterCtor(Out, *this, Verbose)); return false; } @@ -254,7 +254,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -284,7 +284,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -298,7 +298,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; @@ -312,7 +312,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM, if (DumpAsm) { assert(AsmPrinterCtor && "AsmPrinter was not linked in"); if (AsmPrinterCtor) - PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true)); + PM.add(AsmPrinterCtor(errs(), *this, true)); } return false; diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index ba73ca8..90a5cc2 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -45,7 +45,6 @@ protected: // set this functions to ctor pointer at startup time if they are linked in. typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o, X86TargetMachine &tm, - CodeGenOpt::Level OptLevel, bool verbose); static AsmPrinterCtorFn AsmPrinterCtor; |