summaryrefslogtreecommitdiffstats
path: root/lib/Target/X86
diff options
context:
space:
mode:
Diffstat (limited to 'lib/Target/X86')
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp10
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h11
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp7
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp5
-rw-r--r--lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h13
-rw-r--r--lib/Target/X86/CMakeLists.txt2
-rw-r--r--lib/Target/X86/X86.h4
-rw-r--r--lib/Target/X86/X86.td4
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.cpp12
-rw-r--r--lib/Target/X86/X86ELFWriterInfo.h4
-rw-r--r--lib/Target/X86/X86FastISel.cpp29
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp15
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp200
-rw-r--r--lib/Target/X86/X86ISelLowering.h7
-rw-r--r--lib/Target/X86/X86InstrBuilder.h6
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp3
-rw-r--r--lib/Target/X86/X86InstrInfo.td2
-rw-r--r--lib/Target/X86/X86RegisterInfo.td27
-rw-r--r--lib/Target/X86/X86Subtarget.h2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp10
-rw-r--r--lib/Target/X86/X86TargetMachine.h1
21 files changed, 295 insertions, 79 deletions
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index e75cfc5..127f228 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -154,21 +154,13 @@ void X86ATTAsmPrinter::decorateName(std::string &Name,
}
}
-
-
void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
+ unsigned FnAlign = MF.getAlignment();
const Function *F = MF.getFunction();
decorateName(CurrentFnName, F);
SwitchToSection(TAI->SectionForGlobal(F));
-
- // FIXME: A function's alignment should be part of MachineFunction. There
- // shouldn't be a policy decision here.
- unsigned FnAlign = 4;
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- FnAlign = 1;
-
switch (F->getLinkage()) {
default: assert(0 && "Unknown linkage type!");
case Function::InternalLinkage: // Symbols default to internal.
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index bd96115..f47daf1 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -38,9 +38,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
MCStreamer *Streamer;
public:
explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
- const TargetAsmInfo *T, CodeGenOpt::Level OL,
- bool V)
- : AsmPrinter(O, TM, T, OL, V) {
+ const TargetAsmInfo *T, bool V)
+ : AsmPrinter(O, TM, T, V) {
Subtarget = &TM.getSubtarget<X86Subtarget>();
Context = 0;
Streamer = 0;
@@ -140,6 +139,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
void printi128mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
+ void printi256mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
void printf32mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
@@ -152,6 +154,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
void printf128mem(const MachineInstr *MI, unsigned OpNo) {
printMemReference(MI, OpNo);
}
+ void printf256mem(const MachineInstr *MI, unsigned OpNo) {
+ printMemReference(MI, OpNo);
+ }
void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
printLeaMemReference(MI, OpNo);
}
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index d1623d6..e5d80a4 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -25,15 +25,12 @@ using namespace llvm;
///
FunctionPass *llvm::createX86CodePrinterPass(raw_ostream &o,
X86TargetMachine &tm,
- CodeGenOpt::Level OptLevel,
bool verbose) {
const X86Subtarget *Subtarget = &tm.getSubtarget<X86Subtarget>();
if (Subtarget->isFlavorIntel())
- return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(),
- OptLevel, verbose);
- return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(),
- OptLevel, verbose);
+ return new X86IntelAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
+ return new X86ATTAsmPrinter(o, tm, tm.getTargetAsmInfo(), verbose);
}
namespace {
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
index ceae7be..9d4df93 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.cpp
@@ -132,6 +132,7 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Print out labels for the function.
const Function *F = MF.getFunction();
unsigned CC = F->getCallingConv();
+ unsigned FnAlign = MF.getAlignment();
// Populate function information map. Actually, We don't want to populate
// non-stdcall or non-fastcall functions' information right now.
@@ -141,10 +142,6 @@ bool X86IntelAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
decorateName(CurrentFnName, F);
SwitchToTextSection("_text", F);
-
- unsigned FnAlign = 4;
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- FnAlign = 1;
switch (F->getLinkage()) {
default: assert(0 && "Unsupported linkage type!");
case Function::PrivateLinkage:
diff --git a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
index 04f2595..a724c3c 100644
--- a/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86IntelAsmPrinter.h
@@ -26,9 +26,8 @@ namespace llvm {
struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
explicit X86IntelAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
- const TargetAsmInfo *T, CodeGenOpt::Level OL,
- bool V)
- : AsmPrinter(O, TM, T, OL, V) {}
+ const TargetAsmInfo *T, bool V)
+ : AsmPrinter(O, TM, T, V) {}
virtual const char *getPassName() const {
return "X86 Intel-Style Assembly Printer";
@@ -76,6 +75,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
O << "XMMWORD PTR ";
printMemReference(MI, OpNo);
}
+ void printi256mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
void printf32mem(const MachineInstr *MI, unsigned OpNo) {
O << "DWORD PTR ";
printMemReference(MI, OpNo);
@@ -92,6 +95,10 @@ struct VISIBILITY_HIDDEN X86IntelAsmPrinter : public AsmPrinter {
O << "XMMWORD PTR ";
printMemReference(MI, OpNo);
}
+ void printf256mem(const MachineInstr *MI, unsigned OpNo) {
+ O << "YMMWORD PTR ";
+ printMemReference(MI, OpNo);
+ }
void printlea32mem(const MachineInstr *MI, unsigned OpNo) {
O << "DWORD PTR ";
printLeaMemReference(MI, OpNo);
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index d982990..7ea0e51 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -27,3 +27,5 @@ add_llvm_target(X86CodeGen
X86TargetMachine.cpp
X86FastISel.cpp
)
+
+target_link_libraries (LLVMX86CodeGen LLVMSelectionDAG)
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index fd13b02..22de3f6 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -46,9 +46,7 @@ FunctionPass *createX87FPRegKillInserterPass();
/// assembly code for a MachineFunction to the given output stream,
/// using the given target machine description.
///
-FunctionPass *createX86CodePrinterPass(raw_ostream &o,
- X86TargetMachine &tm,
- CodeGenOpt::Level OptLevel,
+FunctionPass *createX86CodePrinterPass(raw_ostream &o, X86TargetMachine &tm,
bool Verbose);
/// createX86CodeEmitterPass - Return a pass that emits the collected X86 code
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 4d26364..47861d5 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -119,6 +119,10 @@ def : Proc<"amdfam10", [FeatureSSE3, FeatureSSE4A,
Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A,
Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"istanbul", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+ Feature3DNowA]>;
+def : Proc<"shanghai", [Feature3DNowA, Feature64Bit, FeatureSSE4A,
+ Feature3DNowA]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
diff --git a/lib/Target/X86/X86ELFWriterInfo.cpp b/lib/Target/X86/X86ELFWriterInfo.cpp
index 315118f..912ab0e 100644
--- a/lib/Target/X86/X86ELFWriterInfo.cpp
+++ b/lib/Target/X86/X86ELFWriterInfo.cpp
@@ -59,18 +59,6 @@ unsigned X86ELFWriterInfo::getRelocationType(unsigned MachineRelTy) const {
return 0;
}
-unsigned X86ELFWriterInfo::getFunctionAlignment(const Function *F) const {
- unsigned FnAlign = 4;
-
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- FnAlign = 1;
-
- if (F->getAlignment())
- FnAlign = Log2_32(F->getAlignment());
-
- return (1 << FnAlign);
-}
-
long int X86ELFWriterInfo::getAddendForRelTy(unsigned RelTy) const {
if (is64Bit) {
switch(RelTy) {
diff --git a/lib/Target/X86/X86ELFWriterInfo.h b/lib/Target/X86/X86ELFWriterInfo.h
index 96485b8..2ba1a0b 100644
--- a/lib/Target/X86/X86ELFWriterInfo.h
+++ b/lib/Target/X86/X86ELFWriterInfo.h
@@ -41,10 +41,6 @@ namespace llvm {
X86ELFWriterInfo(TargetMachine &TM);
virtual ~X86ELFWriterInfo();
- /// getFunctionAlignment - Returns the alignment for function 'F', targets
- /// with different alignment constraints should overload this method
- virtual unsigned getFunctionAlignment(const Function *F) const;
-
/// getRelocationType - Returns the target specific ELF Relocation type.
/// 'MachineRelTy' contains the object code independent relocation type
virtual unsigned getRelocationType(unsigned MachineRelTy) const;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 8a21b35..b336d78 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -452,25 +452,38 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) {
// Check to see if we've already materialized this
// value in a register in this block.
- if (unsigned Reg = LocalValueMap[V]) {
- AM.Base.Reg = Reg;
+ DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
+ if (I != LocalValueMap.end() && I->second != 0) {
+ AM.Base.Reg = I->second;
AM.GV = 0;
return true;
}
- // Issue load from stub if necessary.
+
+ // Issue load from stub.
unsigned Opc = 0;
const TargetRegisterClass *RC = NULL;
+ X86AddressMode StubAM;
+ StubAM.Base.Reg = AM.Base.Reg;
+ StubAM.GV = AM.GV;
+
if (TLI.getPointerTy() == MVT::i32) {
Opc = X86::MOV32rm;
RC = X86::GR32RegisterClass;
+
+ if (Subtarget->isPICStyleGOT() &&
+ TM.getRelocationModel() == Reloc::PIC_)
+ StubAM.GVOpFlags = X86II::MO_GOT;
+
} else {
Opc = X86::MOV64rm;
RC = X86::GR64RegisterClass;
+
+ if (TM.getRelocationModel() != Reloc::Static) {
+ StubAM.GVOpFlags = X86II::MO_GOTPCREL;
+ StubAM.Base.Reg = X86::RIP;
+ }
}
- X86AddressMode StubAM;
- StubAM.Base.Reg = AM.Base.Reg;
- StubAM.GV = AM.GV;
unsigned ResultReg = createResultReg(RC);
addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), StubAM);
@@ -1503,7 +1516,9 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
} else if (Subtarget->isPICStyleGOT()) {
OpFlag = X86II::MO_GOTOFF;
PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
- }
+ } else if (Subtarget->isPICStyleRIPRel() &&
+ TM.getCodeModel() == CodeModel::Small)
+ PICBase = X86::RIP;
}
// Create the load from the constant pool.
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index ed4eb44..37027ee 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -990,16 +990,21 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
}
case X86::FpSET_ST0_32:
case X86::FpSET_ST0_64:
- case X86::FpSET_ST0_80:
+ case X86::FpSET_ST0_80: {
+ unsigned Op0 = getFPReg(MI->getOperand(0));
+
// FpSET_ST0_80 is generated by copyRegToReg for both function return
// and inline assembly with the "st" constrain. In the latter case,
- // it is possible for FP0 to be alive after this instruction.
- if (!MI->killsRegister(X86::FP0)) {
- // Duplicate ST0
- duplicateToTop(0, 0, I);
+ // it is possible for ST(0) to be alive after this instruction.
+ if (!MI->killsRegister(X86::FP0 + Op0)) {
+ // Duplicate Op0
+ duplicateToTop(0, 7 /*temp register*/, I);
+ } else {
+ moveToTop(Op0, I);
}
--StackTop; // "Forget" we have something on the top of stack!
break;
+ }
case X86::FpSET_ST1_32:
case X86::FpSET_ST1_64:
case X86::FpSET_ST1_80:
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9614e69..5a6294a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -700,6 +700,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
// Do not attempt to custom lower non-power-of-2 vectors
if (!isPowerOf2_32(VT.getVectorNumElements()))
continue;
+ // Do not attempt to custom lower non-128-bit vectors
+ if (!VT.is128BitVector())
+ continue;
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -718,17 +721,23 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
}
// Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
- for (unsigned VT = (unsigned)MVT::v16i8; VT != (unsigned)MVT::v2i64; VT++) {
- setOperationAction(ISD::AND, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::AND, (MVT::SimpleValueType)VT, MVT::v2i64);
- setOperationAction(ISD::OR, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::OR, (MVT::SimpleValueType)VT, MVT::v2i64);
- setOperationAction(ISD::XOR, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::XOR, (MVT::SimpleValueType)VT, MVT::v2i64);
- setOperationAction(ISD::LOAD, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::LOAD, (MVT::SimpleValueType)VT, MVT::v2i64);
- setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
- AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v2i64);
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v2i64; i++) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // Do not attempt to promote non-128-bit vectors
+ if (!VT.is128BitVector()) {
+ continue;
+ }
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v2i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v2i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v2i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v2i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v2i64);
}
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -775,6 +784,114 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::VSETCC, MVT::v2i64, Custom);
}
+ if (!UseSoftFloat && Subtarget->hasAVX()) {
+ addRegisterClass(MVT::v8f32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4f64, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v8i32, X86::VR256RegisterClass);
+ addRegisterClass(MVT::v4i64, X86::VR256RegisterClass);
+
+ setOperationAction(ISD::LOAD, MVT::v8f32, Legal);
+ setOperationAction(ISD::LOAD, MVT::v8i32, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4f64, Legal);
+ setOperationAction(ISD::LOAD, MVT::v4i64, Legal);
+ setOperationAction(ISD::FADD, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSUB, MVT::v8f32, Legal);
+ setOperationAction(ISD::FMUL, MVT::v8f32, Legal);
+ setOperationAction(ISD::FDIV, MVT::v8f32, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v8f32, Legal);
+ setOperationAction(ISD::FNEG, MVT::v8f32, Custom);
+ //setOperationAction(ISD::BUILD_VECTOR, MVT::v8f32, Custom);
+ //setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Custom);
+ //setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8f32, Custom);
+ //setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
+ //setOperationAction(ISD::VSETCC, MVT::v8f32, Custom);
+
+ // Operations to consider commented out -v16i16 v32i8
+ //setOperationAction(ISD::ADD, MVT::v16i16, Legal);
+ setOperationAction(ISD::ADD, MVT::v8i32, Custom);
+ setOperationAction(ISD::ADD, MVT::v4i64, Custom);
+ //setOperationAction(ISD::SUB, MVT::v32i8, Legal);
+ //setOperationAction(ISD::SUB, MVT::v16i16, Legal);
+ setOperationAction(ISD::SUB, MVT::v8i32, Custom);
+ setOperationAction(ISD::SUB, MVT::v4i64, Custom);
+ //setOperationAction(ISD::MUL, MVT::v16i16, Legal);
+ setOperationAction(ISD::FADD, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSUB, MVT::v4f64, Legal);
+ setOperationAction(ISD::FMUL, MVT::v4f64, Legal);
+ setOperationAction(ISD::FDIV, MVT::v4f64, Legal);
+ setOperationAction(ISD::FSQRT, MVT::v4f64, Legal);
+ setOperationAction(ISD::FNEG, MVT::v4f64, Custom);
+
+ setOperationAction(ISD::VSETCC, MVT::v4f64, Custom);
+ // setOperationAction(ISD::VSETCC, MVT::v32i8, Custom);
+ // setOperationAction(ISD::VSETCC, MVT::v16i16, Custom);
+ setOperationAction(ISD::VSETCC, MVT::v8i32, Custom);
+
+ // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i8, Custom);
+ // setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i16, Custom);
+ // setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i16, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i32, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8f32, Custom);
+
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4f64, Custom);
+ setOperationAction(ISD::BUILD_VECTOR, MVT::v4i64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f64, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i64, Custom);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f64, Custom);
+
+#if 0
+ // Not sure we want to do this since there are no 256-bit integer
+ // operations in AVX
+
+ // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
+ // This includes 256-bit vectors
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; ++i) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ // Do not attempt to custom lower non-power-of-2 vectors
+ if (!isPowerOf2_32(VT.getVectorNumElements()))
+ continue;
+
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+ }
+
+ if (Subtarget->is64Bit()) {
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i64, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i64, Custom);
+ }
+#endif
+
+#if 0
+ // Not sure we want to do this since there are no 256-bit integer
+ // operations in AVX
+
+ // Promote v32i8, v16i16, v8i32 load, select, and, or, xor to v4i64.
+ // Including 256-bit vectors
+ for (unsigned i = (unsigned)MVT::v16i8; i != (unsigned)MVT::v4i64; i++) {
+ MVT VT = (MVT::SimpleValueType)i;
+
+ if (!VT.is256BitVector()) {
+ continue;
+ }
+ setOperationAction(ISD::AND, VT, Promote);
+ AddPromotedToType (ISD::AND, VT, MVT::v4i64);
+ setOperationAction(ISD::OR, VT, Promote);
+ AddPromotedToType (ISD::OR, VT, MVT::v4i64);
+ setOperationAction(ISD::XOR, VT, Promote);
+ AddPromotedToType (ISD::XOR, VT, MVT::v4i64);
+ setOperationAction(ISD::LOAD, VT, Promote);
+ AddPromotedToType (ISD::LOAD, VT, MVT::v4i64);
+ setOperationAction(ISD::SELECT, VT, Promote);
+ AddPromotedToType (ISD::SELECT, VT, MVT::v4i64);
+ }
+
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+#endif
+ }
+
// We want to custom lower some of our intrinsics.
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -805,6 +922,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::MEMBARRIER);
if (Subtarget->is64Bit())
setTargetDAGCombine(ISD::MUL);
@@ -909,6 +1027,11 @@ SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
return Table;
}
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
+ return F->hasFnAttr(Attribute::OptimizeForSize) ? 1 : 4;
+}
+
//===----------------------------------------------------------------------===//
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
@@ -5690,7 +5813,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
Args.push_back(Entry);
std::pair<SDValue,SDValue> CallResult =
LowerCallTo(Chain, Type::VoidTy, false, false, false, false,
- CallingConv::C, false,
+ 0, CallingConv::C, false,
DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
return CallResult.second;
}
@@ -8454,6 +8577,58 @@ static SDValue PerformVZEXT_MOVLCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+// On X86 and X86-64, atomic operations are lowered to locked instructions.
+// Locked instructions, in turn, have implicit fence semantics (all memory
+// operations are flushed before issuing the locked instruction, and the
+// are not buffered), so we can fold away the common pattern of
+// fence-atomic-fence.
+static SDValue PerformMEMBARRIERCombine(SDNode* N, SelectionDAG &DAG) {
+ SDValue atomic = N->getOperand(0);
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ break;
+ default:
+ return SDValue();
+ }
+
+ SDValue fence = atomic.getOperand(0);
+ if (fence.getOpcode() != ISD::MEMBARRIER)
+ return SDValue();
+
+ switch (atomic.getOpcode()) {
+ case ISD::ATOMIC_CMP_SWAP:
+ return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2),
+ atomic.getOperand(3));
+ case ISD::ATOMIC_SWAP:
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ return DAG.UpdateNodeOperands(atomic, fence.getOperand(0),
+ atomic.getOperand(1), atomic.getOperand(2));
+ default:
+ return SDValue();
+ }
+}
+
SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -8472,6 +8647,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FAND: return PerformFANDCombine(N, DAG);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
+ case ISD::MEMBARRIER: return PerformMEMBARRIERCombine(N, DAG);
}
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index fb4eb68..ffed46c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -380,7 +380,7 @@ namespace llvm {
MVT getOptimalMemOpType(uint64_t Size, unsigned Align,
bool isSrcConst, bool isSrcStr,
SelectionDAG &DAG) const;
-
+
/// LowerOperation - Provide custom lowering hooks for some operations.
///
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
@@ -533,7 +533,10 @@ namespace llvm {
, SmallSet<Instruction*, 8> &
#endif
);
-
+
+ /// getFunctionAlignment - Return the Log2 alignment of this function.
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+
private:
/// Subtarget - Keep a pointer to the X86Subtarget around so that we can
/// make the right decision when generating code for different targets.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index b50dd65..6359542 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -49,8 +49,10 @@ struct X86AddressMode {
unsigned IndexReg;
unsigned Disp;
GlobalValue *GV;
+ unsigned GVOpFlags;
- X86AddressMode() : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0) {
+ X86AddressMode()
+ : BaseType(RegBase), Scale(1), IndexReg(0), Disp(0), GV(0), GVOpFlags(0) {
Base.Reg = 0;
}
};
@@ -113,7 +115,7 @@ inline const MachineInstrBuilder &addLeaAddress(const MachineInstrBuilder &MIB,
assert (0);
MIB.addImm(AM.Scale).addReg(AM.IndexReg);
if (AM.GV)
- return MIB.addGlobalAddress(AM.GV, AM.Disp);
+ return MIB.addGlobalAddress(AM.GV, AM.Disp, AM.GVOpFlags);
else
return MIB.addImm(AM.Disp);
}
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 21f71ec..e5d84c5 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -2459,7 +2459,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
getDefRegState(MO.isDef()) |
RegState::Implicit |
getKillRegState(MO.isKill()) |
- getDeadRegState(MO.isDead()));
+ getDeadRegState(MO.isDead()) |
+ getUndefRegState(MO.isUndef()));
}
// Change CMP32ri r, 0 back to TEST32rr r, r, etc.
unsigned NewOpc = 0;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index a6b0880..03df10d 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -180,10 +180,12 @@ def i16mem : X86MemOperand<"printi16mem">;
def i32mem : X86MemOperand<"printi32mem">;
def i64mem : X86MemOperand<"printi64mem">;
def i128mem : X86MemOperand<"printi128mem">;
+def i256mem : X86MemOperand<"printi256mem">;
def f32mem : X86MemOperand<"printf32mem">;
def f64mem : X86MemOperand<"printf64mem">;
def f80mem : X86MemOperand<"printf80mem">;
def f128mem : X86MemOperand<"printf128mem">;
+def f256mem : X86MemOperand<"printf256mem">;
// A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
// plain GR64, so that it doesn't potentially require a REX prefix.
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 996baa0..2e6f017 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -157,6 +157,24 @@ let Namespace = "X86" in {
def XMM14: Register<"xmm14">, DwarfRegNum<[31, -2, -2]>;
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
+ // YMM Registers, used by AVX instructions
+ def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
+ def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
+ def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
+ def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
+ def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
+ def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
+ def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
+ def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
+ def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>;
+ def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>;
+ def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
+ def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
+ def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
+ def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
+ def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
+ def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
+
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
@@ -229,6 +247,11 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
[EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
+def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
@@ -755,6 +778,10 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
}
}];
}
+def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
+ [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11,
+ YMM12, YMM13, YMM14, YMM15]>;
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index f4f6cce..0d1434f 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -142,7 +142,7 @@ public:
bool hasSSE4A() const { return HasSSE4A; }
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
- bool hasAVX() const { return hasAVX(); }
+ bool hasAVX() const { return HasAVX; }
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 67dcd01..b000914 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -226,7 +226,7 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
assert(AsmPrinterCtor && "AsmPrinter was not linked in");
if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(Out, *this, OptLevel, Verbose));
+ PM.add(AsmPrinterCtor(Out, *this, Verbose));
return false;
}
@@ -254,7 +254,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
if (DumpAsm) {
assert(AsmPrinterCtor && "AsmPrinter was not linked in");
if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ PM.add(AsmPrinterCtor(errs(), *this, true));
}
return false;
@@ -284,7 +284,7 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
if (DumpAsm) {
assert(AsmPrinterCtor && "AsmPrinter was not linked in");
if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ PM.add(AsmPrinterCtor(errs(), *this, true));
}
return false;
@@ -298,7 +298,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
if (DumpAsm) {
assert(AsmPrinterCtor && "AsmPrinter was not linked in");
if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ PM.add(AsmPrinterCtor(errs(), *this, true));
}
return false;
@@ -312,7 +312,7 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
if (DumpAsm) {
assert(AsmPrinterCtor && "AsmPrinter was not linked in");
if (AsmPrinterCtor)
- PM.add(AsmPrinterCtor(errs(), *this, OptLevel, true));
+ PM.add(AsmPrinterCtor(errs(), *this, true));
}
return false;
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ba73ca8..90a5cc2 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -45,7 +45,6 @@ protected:
// set this functions to ctor pointer at startup time if they are linked in.
typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
X86TargetMachine &tm,
- CodeGenOpt::Level OptLevel,
bool verbose);
static AsmPrinterCtorFn AsmPrinterCtor;
OpenPOWER on IntegriCloud