diff options
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/ARM/ARMConstantIslandPass.cpp | 33 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelDAGToDAG.cpp | 283 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 49 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 6 |
4 files changed, 179 insertions, 192 deletions
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp index 309e3ba..b0bd04b 100644 --- a/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -74,16 +74,28 @@ namespace { /// CPUser - One user of a constant pool, keeping the machine instruction /// pointer, the constant pool being referenced, and the max displacement - /// allowed from the instruction to the CP. + /// allowed from the instruction to the CP. The HighWaterMark records the + /// highest basic block where a new CPEntry can be placed. To ensure this + /// pass terminates, the CP entries are initially placed at the end of the + /// function and then move monotonically to lower addresses. The + /// exception to this rule is when the current CP entry for a particular + /// CPUser is out of range, but there is another CP entry for the same + /// constant value in range. We want to use the existing in-range CP + /// entry, but if it later moves out of range, the search for new water + /// should resume where it left off. The HighWaterMark is used to record + /// that point. struct CPUser { MachineInstr *MI; MachineInstr *CPEMI; + MachineBasicBlock *HighWaterMark; unsigned MaxDisp; bool NegOk; bool IsSoImm; CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp, bool neg, bool soimm) - : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) {} + : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm) { + HighWaterMark = CPEMI->getParent(); + } }; /// CPUsers - Keep track of all of the machine instructions that use various @@ -962,8 +974,8 @@ bool ARMConstantIslands::LookForWater(CPUser &U, unsigned UserOffset, B = WaterList.begin();; --IP) { MachineBasicBlock* WaterBB = *IP; // Check if water is in range and at a lower address than the current one. - if (WaterIsInRange(UserOffset, WaterBB, U) && - WaterBB->getNumber() < U.CPEMI->getParent()->getNumber()) { + if (WaterBB->getNumber() < U.HighWaterMark->getNumber() && + WaterIsInRange(UserOffset, WaterBB, U)) { unsigned WBBId = WaterBB->getNumber(); if (isThumb && (BBOffsets[WBBId] + BBSizes[WBBId])%4 != 0) { @@ -1006,14 +1018,12 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex, BBSizes[UserMBB->getNumber()]; assert(OffsetOfNextBlock== BBOffsets[UserMBB->getNumber()+1]); - // If the use is at the end of the block, or the end of the block - // is within range, make new water there. (The addition below is - // for the unconditional branch we will be adding: 4 bytes on ARM + Thumb2, - // 2 on Thumb1. Possible Thumb1 alignment padding is allowed for + // If the block does not end in an unconditional branch already, and if the + // end of the block is within range, make new water there. (The addition + // below is for the unconditional branch we will be adding: 4 bytes on ARM + + // Thumb2, 2 on Thumb1. Possible Thumb1 alignment padding is allowed for // inside OffsetIsInRange. - // If the block ends in an unconditional branch already, it is water, - // and is known to be out of range, so we'll always be adding a branch.) - if (&UserMBB->back() == UserMI || + if (BBHasFallthrough(UserMBB) && OffsetIsInRange(UserOffset, OffsetOfNextBlock + (isThumb1 ? 2: 4), U.MaxDisp, U.NegOk, U.IsSoImm)) { DEBUG(errs() << "Split at end of block\n"); @@ -1131,6 +1141,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF, // Now that we have an island to add the CPE to, clone the original CPE and // add it to the island. + U.HighWaterMark = NewIsland; U.CPEMI = BuildMI(NewIsland, DebugLoc::getUnknownLoc(), TII->get(ARM::CONSTPOOL_ENTRY)) .addImm(ID).addConstantPoolIndex(CPI).addImm(Size); diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index bebf4e8..c39de0a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -133,6 +133,13 @@ private: SDNode *SelectVLD(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1); + /// SelectVST - Select NEON store intrinsics. NumVecs should + /// be 2, 3 or 4. The opcode arrays specify the instructions used for + /// stores of D registers and even subregs and odd subregs of Q registers. + /// For NumVecs == 2, QOpcodes1 is not used. + SDNode *SelectVST(SDValue Op, unsigned NumVecs, unsigned *DOpcodes, + unsigned *QOpcodes0, unsigned *QOpcodes1); + /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should /// be 2, 3 or 4. The opcode arrays specify the instructions used for /// load/store of D registers and even subregs and odd subregs of Q registers. @@ -1063,13 +1070,13 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, ResTys.push_back(MemAddr.getValueType()); ResTys.push_back(MVT::Other); - // Load the even subreg. + // Load the even subregs. unsigned Opc = QOpcodes0[OpcodeIndex]; const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain }; SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4); Chain = SDValue(VLdA, NumVecs+1); - // Load the odd subreg. + // Load the odd subregs. Opc = QOpcodes1[OpcodeIndex]; const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain }; SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4); @@ -1085,6 +1092,95 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs, return NULL; } +SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs, + unsigned *DOpcodes, unsigned *QOpcodes0, + unsigned *QOpcodes1) { + assert(NumVecs >=2 && NumVecs <= 4 && "VST NumVecs out-of-range"); + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + + SDValue MemAddr, MemUpdate, MemOpc; + if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) + return NULL; + + SDValue Chain = N->getOperand(0); + EVT VT = N->getOperand(3).getValueType(); + bool is64BitVector = VT.is64BitVector(); + + unsigned OpcodeIndex; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("unhandled vst type"); + // Double-register operations: + case MVT::v8i8: OpcodeIndex = 0; break; + case MVT::v4i16: OpcodeIndex = 1; break; + case MVT::v2f32: + case MVT::v2i32: OpcodeIndex = 2; break; + case MVT::v1i64: OpcodeIndex = 3; break; + // Quad-register operations: + case MVT::v16i8: OpcodeIndex = 0; break; + case MVT::v8i16: OpcodeIndex = 1; break; + case MVT::v4f32: + case MVT::v4i32: OpcodeIndex = 2; break; + } + + SmallVector<SDValue, 8> Ops; + Ops.push_back(MemAddr); + Ops.push_back(MemUpdate); + Ops.push_back(MemOpc); + + if (is64BitVector) { + unsigned Opc = DOpcodes[OpcodeIndex]; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(N->getOperand(Vec+3)); + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4); + } + + EVT RegVT = GetNEONSubregVT(VT); + if (NumVecs == 2) { + // Quad registers are directly supported for VST2, + // storing 2 pairs of D regs. + unsigned Opc = QOpcodes0[OpcodeIndex]; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(Vec+3))); + } + Ops.push_back(Chain); + return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8); + } + + // Otherwise, quad registers are stored with two separate instructions, + // where one stores the even registers and the other stores the odd registers. + + // Enable writeback to the address register. + MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); + + // Store the even subregs. + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops.push_back(CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, + N->getOperand(Vec+3))); + Ops.push_back(Chain); + unsigned Opc = QOpcodes0[OpcodeIndex]; + SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+4); + Chain = SDValue(VStA, 1); + + // Store the odd subregs. + Ops[0] = SDValue(VStA, 0); // MemAddr + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, + N->getOperand(Vec+3)); + Ops[NumVecs+3] = Chain; + Opc = QOpcodes1[OpcodeIndex]; + SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), + MVT::Other, Ops.data(), NumVecs+4); + Chain = SDValue(VStB, 1); + ReplaceUses(SDValue(N, 0), Chain); + return NULL; +} + SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, @@ -1612,9 +1708,6 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: { unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); - EVT VT = N->getValueType(0); - unsigned Opc = 0; - switch (IntNo) { default: break; @@ -1664,178 +1757,26 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { } case Intrinsic::arm_neon_vst2: { - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) - return NULL; - SDValue Chain = N->getOperand(0); - VT = N->getOperand(3).getValueType(); - if (VT.is64BitVector()) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vst2 type"); - case MVT::v8i8: Opc = ARM::VST2d8; break; - case MVT::v4i16: Opc = ARM::VST2d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VST2d32; break; - case MVT::v1i64: Opc = ARM::VST2d64; break; - } - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 6); - } - // Quad registers are stored as pairs of double registers. - EVT RegVT; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vst2 type"); - case MVT::v16i8: Opc = ARM::VST2q8; RegVT = MVT::v8i8; break; - case MVT::v8i16: Opc = ARM::VST2q16; RegVT = MVT::v4i16; break; - case MVT::v4f32: Opc = ARM::VST2q32; RegVT = MVT::v2f32; break; - case MVT::v4i32: Opc = ARM::VST2q32; RegVT = MVT::v2i32; break; - } - SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(3)); - SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(3)); - SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(4)); - SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(4)); - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - D0, D1, D2, D3, Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); + unsigned DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, + ARM::VST2d32, ARM::VST2d64 }; + unsigned QOpcodes[] = { ARM::VST2q8, ARM::VST2q16, ARM::VST2q32 }; + return SelectVST(Op, 2, DOpcodes, QOpcodes, 0); } case Intrinsic::arm_neon_vst3: { - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) - return NULL; - SDValue Chain = N->getOperand(0); - VT = N->getOperand(3).getValueType(); - if (VT.is64BitVector()) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vst3 type"); - case MVT::v8i8: Opc = ARM::VST3d8; break; - case MVT::v4i16: Opc = ARM::VST3d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VST3d32; break; - case MVT::v1i64: Opc = ARM::VST3d64; break; - } - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 7); - } - // Quad registers are stored with two separate instructions, where one - // stores the even registers and the other stores the odd registers. - EVT RegVT; - unsigned Opc2 = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vst3 type"); - case MVT::v16i8: - Opc = ARM::VST3q8a; Opc2 = ARM::VST3q8b; RegVT = MVT::v8i8; break; - case MVT::v8i16: - Opc = ARM::VST3q16a; Opc2 = ARM::VST3q16b; RegVT = MVT::v4i16; break; - case MVT::v4f32: - Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2f32; break; - case MVT::v4i32: - Opc = ARM::VST3q32a; Opc2 = ARM::VST3q32b; RegVT = MVT::v2i32; break; - } - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); - - SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(3)); - SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(4)); - SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(5)); - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, D0, D2, D4, Chain }; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, OpsA, 7); - Chain = SDValue(VStA, 1); - - SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(3)); - SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(4)); - SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(5)); - MemAddr = SDValue(VStA, 0); - const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, D1, D3, D5, Chain }; - SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(), - MVT::Other, OpsB, 7); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + unsigned DOpcodes[] = { ARM::VST3d8, ARM::VST3d16, + ARM::VST3d32, ARM::VST3d64 }; + unsigned QOpcodes0[] = { ARM::VST3q8a, ARM::VST3q16a, ARM::VST3q32a }; + unsigned QOpcodes1[] = { ARM::VST3q8b, ARM::VST3q16b, ARM::VST3q32b }; + return SelectVST(Op, 3, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst4: { - SDValue MemAddr, MemUpdate, MemOpc; - if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc)) - return NULL; - SDValue Chain = N->getOperand(0); - VT = N->getOperand(3).getValueType(); - if (VT.is64BitVector()) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vst4 type"); - case MVT::v8i8: Opc = ARM::VST4d8; break; - case MVT::v4i16: Opc = ARM::VST4d16; break; - case MVT::v2f32: - case MVT::v2i32: Opc = ARM::VST4d32; break; - case MVT::v1i64: Opc = ARM::VST4d64; break; - } - const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, - N->getOperand(3), N->getOperand(4), - N->getOperand(5), N->getOperand(6), Chain }; - return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops, 8); - } - // Quad registers are stored with two separate instructions, where one - // stores the even registers and the other stores the odd registers. - EVT RegVT; - unsigned Opc2 = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("unhandled vst4 type"); - case MVT::v16i8: - Opc = ARM::VST4q8a; Opc2 = ARM::VST4q8b; RegVT = MVT::v8i8; break; - case MVT::v8i16: - Opc = ARM::VST4q16a; Opc2 = ARM::VST4q16b; RegVT = MVT::v4i16; break; - case MVT::v4f32: - Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2f32; break; - case MVT::v4i32: - Opc = ARM::VST4q32a; Opc2 = ARM::VST4q32b; RegVT = MVT::v2i32; break; - } - // Enable writeback to the address register. - MemOpc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(true), MVT::i32); - - SDValue D0 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(3)); - SDValue D2 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(4)); - SDValue D4 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(5)); - SDValue D6 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_0, dl, RegVT, - N->getOperand(6)); - const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, - D0, D2, D4, D6, Chain }; - SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(), - MVT::Other, OpsA, 8); - Chain = SDValue(VStA, 1); - - SDValue D1 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(3)); - SDValue D3 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(4)); - SDValue D5 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(5)); - SDValue D7 = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT, - N->getOperand(6)); - MemAddr = SDValue(VStA, 0); - const SDValue OpsB[] = { MemAddr, MemUpdate, MemOpc, - D1, D3, D5, D7, Chain }; - SDNode *VStB = CurDAG->getMachineNode(Opc2, dl, MemAddr.getValueType(), - MVT::Other, OpsB, 8); - Chain = SDValue(VStB, 1); - ReplaceUses(SDValue(N, 0), Chain); - return NULL; + unsigned DOpcodes[] = { ARM::VST4d8, ARM::VST4d16, + ARM::VST4d32, ARM::VST4d64 }; + unsigned QOpcodes0[] = { ARM::VST4q8a, ARM::VST4q16a, ARM::VST4q32a }; + unsigned QOpcodes1[] = { ARM::VST4q8b, ARM::VST4q16b, ARM::VST4q32b }; + return SelectVST(Op, 4, DOpcodes, QOpcodes0, QOpcodes1); } case Intrinsic::arm_neon_vst2lane: { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 8adfac3..6ec78bc 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -377,12 +377,15 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr, opc, " $dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { + let Inst{4} = 0; let Inst{25} = 0; let isCommutable = Commutable; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { + let Inst{4} = 1; + let Inst{7} = 0; let Inst{25} = 0; } } @@ -401,11 +404,14 @@ multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode, IIC_iALUr, opc, "s $dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> { let isCommutable = Commutable; + let Inst{4} = 0; let Inst{25} = 0; } def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, "s $dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> { + let Inst{4} = 1; + let Inst{7} = 0; let Inst{25} = 0; } } @@ -426,6 +432,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr, opc, " $a, $b", [(opnode GPR:$a, GPR:$b)]> { + let Inst{4} = 0; let Inst{20} = 1; let Inst{25} = 0; let isCommutable = Commutable; @@ -433,6 +440,8 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode, def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr, opc, " $a, $b", [(opnode GPR:$a, so_reg:$b)]> { + let Inst{4} = 1; + let Inst{7} = 0; let Inst{20} = 1; let Inst{25} = 0; } @@ -486,12 +495,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { let isCommutable = Commutable; + let Inst{4} = 0; let Inst{25} = 0; } def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iALUsr, opc, " $dst, $a, $b", [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUnused]> { + let Inst{4} = 1; + let Inst{7} = 0; let Inst{25} = 0; } // Carry setting variants @@ -507,6 +519,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; + let Inst{4} = 0; let Inst{25} = 0; } def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), @@ -514,6 +527,8 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>, Requires<[IsARM, CarryDefIsUsed]> { let Defs = [CPSR]; + let Inst{4} = 1; + let Inst{7} = 0; let Inst{25} = 0; } } @@ -924,10 +939,18 @@ def STM : AXI4st<(outs), let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, - "mov", " $dst, $src", []>, UnaryDP; + "mov", " $dst, $src", []>, UnaryDP { + let Inst{4} = 0; + let Inst{25} = 0; +} + def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, - "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP; + "mov", " $dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP { + let Inst{4} = 1; + let Inst{7} = 0; + let Inst{25} = 0; +} let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi, @@ -1146,10 +1169,15 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr, "mvn", " $dst, $src", - [(set GPR:$dst, (not GPR:$src))]>, UnaryDP; + [(set GPR:$dst, (not GPR:$src))]>, UnaryDP { + let Inst{4} = 0; +} def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm, IIC_iMOVsr, "mvn", " $dst, $src", - [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP; + [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP { + let Inst{4} = 1; + let Inst{7} = 0; +} let isReMaterializable = 1, isAsCheapAsAMove = 1 in def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm, IIC_iMOVi, "mvn", " $dst, $imm", @@ -1461,20 +1489,27 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm), def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm, IIC_iCMOVr, "mov", " $dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">, UnaryDP; + RegConstraint<"$false = $dst">, UnaryDP { + let Inst{4} = 0; + let Inst{25} = 0; +} def MOVCCs : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, so_reg:$true), DPSoRegFrm, IIC_iCMOVsr, "mov", " $dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $dst">, UnaryDP; + RegConstraint<"$false = $dst">, UnaryDP { + let Inst{4} = 1; + let Inst{7} = 0; + let Inst{25} = 0; +} def MOVCCi : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, so_imm:$true), DPFrm, IIC_iCMOVi, "mov", " $dst, $true", [/*(set GPR:$dst, (ARMcmov GPR:$false, so_imm:$true, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $dst">, UnaryDP { - let Inst{25} = 1; + let Inst{25} = 1; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index cd370aa..b34aff7 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1951,9 +1951,9 @@ defm VPADDLu : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpaddl.u", int_arm_neon_vpaddlu>; // VPADAL : Vector Pairwise Add and Accumulate Long -defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpadal.s", +defm VPADALs : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01100, 0, "vpadal.s", int_arm_neon_vpadals>; -defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b00101, 0, "vpadal.u", +defm VPADALu : N2VPLInt2_QHS<0b11, 0b11, 0b00, 0b01101, 0, "vpadal.u", int_arm_neon_vpadalu>; // VPMAX : Vector Pairwise Maximum @@ -2038,7 +2038,7 @@ defm VSHLs : N3VInt_QHSD<0, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, defm VSHLu : N3VInt_QHSD<1, 0, 0b0100, 0, IIC_VSHLiD, IIC_VSHLiD, IIC_VSHLiQ, IIC_VSHLiQ, "vshl.u", int_arm_neon_vshiftu, 0>; // VSHL : Vector Shift Left (Immediate) -defm VSHLi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; +defm VSHLi : N2VSh_QHSD<0, 1, 0b0101, 1, IIC_VSHLiD, "vshl.i", NEONvshl>; // VSHR : Vector Shift Right (Immediate) defm VSHRs : N2VSh_QHSD<0, 1, 0b0000, 1, IIC_VSHLiD, "vshr.s", NEONvshrs>; defm VSHRu : N2VSh_QHSD<1, 1, 0b0000, 1, IIC_VSHLiD, "vshr.u", NEONvshru>; |