diff options
Diffstat (limited to 'contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff')
-rw-r--r-- | contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff | 419 |
1 files changed, 0 insertions, 419 deletions
diff --git a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff b/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff deleted file mode 100644 index 2896899..0000000 --- a/contrib/llvm/patches/patch-08-llvm-r230348-arm-fix-bad-ha.diff +++ /dev/null @@ -1,419 +0,0 @@ -Pull in r230348 from upstream llvm trunk (by Tim Northover): - - ARM: treat [N x i32] and [N x i64] as AAPCS composite types - - The logic is almost there already, with our special homogeneous - aggregate handling. Tweaking it like this allows front-ends to emit - AAPCS compliant code without ever having to count registers or add - discarded padding arguments. - - Only arrays of i32 and i64 are needed to model AAPCS rules, but I - decided to apply the logic to all integer arrays for more consistency. - -This fixes a possible "Unexpected member type for HA" error when -compiling lib/msun/bsdsrc/b_tgamma.c for armv6. - -Reported by: Jakub Palider <jpa@semihalf.com> - -Introduced here: https://svnweb.freebsd.org/changeset/base/280400 - -Index: include/llvm/CodeGen/CallingConvLower.h -=================================================================== ---- include/llvm/CodeGen/CallingConvLower.h -+++ include/llvm/CodeGen/CallingConvLower.h -@@ -122,8 +122,8 @@ class CCValAssign { - // There is no need to differentiate between a pending CCValAssign and other - // kinds, as they are stored in a different list. - static CCValAssign getPending(unsigned ValNo, MVT ValVT, MVT LocVT, -- LocInfo HTP) { -- return getReg(ValNo, ValVT, 0, LocVT, HTP); -+ LocInfo HTP, unsigned ExtraInfo = 0) { -+ return getReg(ValNo, ValVT, ExtraInfo, LocVT, HTP); - } - - void convertToReg(unsigned RegNo) { -@@ -146,6 +146,7 @@ class CCValAssign { - - unsigned getLocReg() const { assert(isRegLoc()); return Loc; } - unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; } -+ unsigned getExtraInfo() const { return Loc; } - MVT getLocVT() const { return LocVT; } - - LocInfo getLocInfo() const { return HTP; } -Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp -=================================================================== ---- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp -+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp -@@ -7429,11 +7429,8 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo - } - if (Args[i].isNest) - Flags.setNest(); -- if (NeedsRegBlock) { -+ if (NeedsRegBlock) - Flags.setInConsecutiveRegs(); -- if (Value == NumValues - 1) -- Flags.setInConsecutiveRegsLast(); -- } - Flags.setOrigAlign(OriginalAlignment); - - MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); -@@ -7482,6 +7479,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLo - CLI.Outs.push_back(MyFlags); - CLI.OutVals.push_back(Parts[j]); - } -+ -+ if (NeedsRegBlock && Value == NumValues - 1) -+ CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); - } - } - -@@ -7697,11 +7697,8 @@ void SelectionDAGISel::LowerArguments(const Functi - } - if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) - Flags.setNest(); -- if (NeedsRegBlock) { -+ if (NeedsRegBlock) - Flags.setInConsecutiveRegs(); -- if (Value == NumValues - 1) -- Flags.setInConsecutiveRegsLast(); -- } - Flags.setOrigAlign(OriginalAlignment); - - MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); -@@ -7716,6 +7713,8 @@ void SelectionDAGISel::LowerArguments(const Functi - MyFlags.Flags.setOrigAlign(1); - Ins.push_back(MyFlags); - } -+ if (NeedsRegBlock && Value == NumValues - 1) -+ Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); - PartBase += VT.getStoreSize(); - } - } -Index: lib/Target/ARM/ARMCallingConv.h -=================================================================== ---- lib/Target/ARM/ARMCallingConv.h -+++ lib/Target/ARM/ARMCallingConv.h -@@ -160,6 +160,8 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &V - State); - } - -+static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -+ - static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, -@@ -168,81 +170,114 @@ static const uint16_t DRegList[] = { ARM::D0, ARM: - ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; - static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; - -+ - // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA - // has InConsecutiveRegs set, and that the last member also has - // InConsecutiveRegsLast set. We must process all members of the HA before - // we can allocate it, as we need to know the total number of registers that - // will be needed in order to (attempt to) allocate a contiguous block. --static bool CC_ARM_AAPCS_Custom_HA(unsigned &ValNo, MVT &ValVT, MVT &LocVT, -- CCValAssign::LocInfo &LocInfo, -- ISD::ArgFlagsTy &ArgFlags, CCState &State) { -- SmallVectorImpl<CCValAssign> &PendingHAMembers = State.getPendingLocs(); -+static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, -+ MVT &LocVT, -+ CCValAssign::LocInfo &LocInfo, -+ ISD::ArgFlagsTy &ArgFlags, -+ CCState &State) { -+ SmallVectorImpl<CCValAssign> &PendingMembers = State.getPendingLocs(); - - // AAPCS HFAs must have 1-4 elements, all of the same type -- assert(PendingHAMembers.size() < 4); -- if (PendingHAMembers.size() > 0) -- assert(PendingHAMembers[0].getLocVT() == LocVT); -+ if (PendingMembers.size() > 0) -+ assert(PendingMembers[0].getLocVT() == LocVT); - - // Add the argument to the list to be allocated once we know the size of the -- // HA -- PendingHAMembers.push_back( -- CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo)); -+ // aggregate. Store the type's required alignmnent as extra info for later: in -+ // the [N x i64] case all trace has been removed by the time we actually get -+ // to do allocation. -+ PendingMembers.push_back(CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo, -+ ArgFlags.getOrigAlign())); - -- if (ArgFlags.isInConsecutiveRegsLast()) { -- assert(PendingHAMembers.size() > 0 && PendingHAMembers.size() <= 4 && -- "Homogeneous aggregates must have between 1 and 4 members"); -+ if (!ArgFlags.isInConsecutiveRegsLast()) -+ return true; - -- // Try to allocate a contiguous block of registers, each of the correct -- // size to hold one member. -- ArrayRef<uint16_t> RegList; -- switch (LocVT.SimpleTy) { -- case MVT::f32: -- RegList = SRegList; -- break; -- case MVT::f64: -- RegList = DRegList; -- break; -- case MVT::v2f64: -- RegList = QRegList; -- break; -- default: -- llvm_unreachable("Unexpected member type for HA"); -- break; -- } -+ // Try to allocate a contiguous block of registers, each of the correct -+ // size to hold one member. -+ unsigned Align = std::min(PendingMembers[0].getExtraInfo(), 8U); - -- unsigned RegResult = -- State.AllocateRegBlock(RegList, PendingHAMembers.size()); -+ ArrayRef<uint16_t> RegList; -+ switch (LocVT.SimpleTy) { -+ case MVT::i32: { -+ RegList = RRegList; -+ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size()); - -- if (RegResult) { -- for (SmallVectorImpl<CCValAssign>::iterator It = PendingHAMembers.begin(); -- It != PendingHAMembers.end(); ++It) { -- It->convertToReg(RegResult); -- State.addLoc(*It); -- ++RegResult; -- } -- PendingHAMembers.clear(); -- return true; -- } -+ // First consume all registers that would give an unaligned object. Whether -+ // we go on stack or in regs, no-one will be using them in future. -+ unsigned RegAlign = RoundUpToAlignment(Align, 4) / 4; -+ while (RegIdx % RegAlign != 0 && RegIdx < RegList.size()) -+ State.AllocateReg(RegList[RegIdx++]); - -- // Register allocation failed, fall back to the stack -+ break; -+ } -+ case MVT::f32: -+ RegList = SRegList; -+ break; -+ case MVT::f64: -+ RegList = DRegList; -+ break; -+ case MVT::v2f64: -+ RegList = QRegList; -+ break; -+ default: -+ llvm_unreachable("Unexpected member type for block aggregate"); -+ break; -+ } - -- // Mark all VFP regs as unavailable (AAPCS rule C.2.vfp) -- for (unsigned regNo = 0; regNo < 16; ++regNo) -- State.AllocateReg(SRegList[regNo]); -+ unsigned RegResult = State.AllocateRegBlock(RegList, PendingMembers.size()); -+ if (RegResult) { -+ for (SmallVectorImpl<CCValAssign>::iterator It = PendingMembers.begin(); -+ It != PendingMembers.end(); ++It) { -+ It->convertToReg(RegResult); -+ State.addLoc(*It); -+ ++RegResult; -+ } -+ PendingMembers.clear(); -+ return true; -+ } - -- unsigned Size = LocVT.getSizeInBits() / 8; -- unsigned Align = std::min(Size, 8U); -+ // Register allocation failed, we'll be needing the stack -+ unsigned Size = LocVT.getSizeInBits() / 8; -+ if (LocVT == MVT::i32 && State.getNextStackOffset() == 0) { -+ // If nothing else has used the stack until this point, a non-HFA aggregate -+ // can be split between regs and stack. -+ unsigned RegIdx = State.getFirstUnallocated(RegList.data(), RegList.size()); -+ for (auto &It : PendingMembers) { -+ if (RegIdx >= RegList.size()) -+ It.convertToMem(State.AllocateStack(Size, Size)); -+ else -+ It.convertToReg(State.AllocateReg(RegList[RegIdx++])); - -- for (auto It : PendingHAMembers) { -- It.convertToMem(State.AllocateStack(Size, Align)); - State.addLoc(It); - } -+ PendingMembers.clear(); -+ return true; -+ } else if (LocVT != MVT::i32) -+ RegList = SRegList; - -- // All pending members have now been allocated -- PendingHAMembers.clear(); -+ // Mark all regs as unavailable (AAPCS rule C.2.vfp for VFP, C.6 for core) -+ for (auto Reg : RegList) -+ State.AllocateReg(Reg); -+ -+ for (auto &It : PendingMembers) { -+ It.convertToMem(State.AllocateStack(Size, Align)); -+ State.addLoc(It); -+ -+ // After the first item has been allocated, the rest are packed as tightly -+ // as possible. (E.g. an incoming i64 would have starting Align of 8, but -+ // we'll be allocating a bunch of i32 slots). -+ Align = Size; - } - -- // This will be allocated by the last member of the HA -+ // All pending members have now been allocated -+ PendingMembers.clear(); -+ -+ // This will be allocated by the last member of the aggregate - return true; - } - -Index: lib/Target/ARM/ARMCallingConv.td -=================================================================== ---- lib/Target/ARM/ARMCallingConv.td -+++ lib/Target/ARM/ARMCallingConv.td -@@ -175,7 +175,7 @@ def CC_ARM_AAPCS_VFP : CallingConv<[ - CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32], CCBitConvertToType<v2f64>>, - - // HFAs are passed in a contiguous block of registers, or on the stack -- CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_HA">>, -+ CCIfConsecutiveRegs<CCCustom<"CC_ARM_AAPCS_Custom_Aggregate">>, - - CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, - CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, -Index: lib/Target/ARM/ARMISelLowering.cpp -=================================================================== ---- lib/Target/ARM/ARMISelLowering.cpp -+++ lib/Target/ARM/ARMISelLowering.cpp -@@ -11285,7 +11285,9 @@ static bool isHomogeneousAggregate(Type *Ty, HABas - return (Members > 0 && Members <= 4); - } - --/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate. -+/// \brief Return true if a type is an AAPCS-VFP homogeneous aggregate or one of -+/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when -+/// passing according to AAPCS rules. - bool ARMTargetLowering::functionArgumentNeedsConsecutiveRegisters( - Type *Ty, CallingConv::ID CallConv, bool isVarArg) const { - if (getEffectiveCallingConv(CallConv, isVarArg) != -@@ -11294,7 +11296,9 @@ bool ARMTargetLowering::functionArgumentNeedsConse - - HABaseType Base = HA_UNKNOWN; - uint64_t Members = 0; -- bool result = isHomogeneousAggregate(Ty, Base, Members); -- DEBUG(dbgs() << "isHA: " << result << " "; Ty->dump()); -- return result; -+ bool IsHA = isHomogeneousAggregate(Ty, Base, Members); -+ DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump()); -+ -+ bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy(); -+ return IsHA || IsIntArray; - } -Index: test/CodeGen/ARM/aggregate-padding.ll -=================================================================== ---- test/CodeGen/ARM/aggregate-padding.ll -+++ test/CodeGen/ARM/aggregate-padding.ll -@@ -0,0 +1,101 @@ -+; RUN: llc -mtriple=armv7-linux-gnueabihf %s -o - | FileCheck %s -+ -+; [2 x i64] should be contiguous when split (e.g. we shouldn't try to align all -+; i32 components to 64 bits). Also makes sure i64 based types are properly -+; aligned on the stack. -+define i64 @test_i64_contiguous_on_stack([8 x double], float, i32 %in, [2 x i64] %arg) nounwind { -+; CHECK-LABEL: test_i64_contiguous_on_stack: -+; CHECK-DAG: ldr [[LO0:r[0-9]+]], [sp, #8] -+; CHECK-DAG: ldr [[HI0:r[0-9]+]], [sp, #12] -+; CHECK-DAG: ldr [[LO1:r[0-9]+]], [sp, #16] -+; CHECK-DAG: ldr [[HI1:r[0-9]+]], [sp, #20] -+; CHECK: adds r0, [[LO0]], [[LO1]] -+; CHECK: adc r1, [[HI0]], [[HI1]] -+ -+ %val1 = extractvalue [2 x i64] %arg, 0 -+ %val2 = extractvalue [2 x i64] %arg, 1 -+ %sum = add i64 %val1, %val2 -+ ret i64 %sum -+} -+ -+; [2 x i64] should try to use looks for 4 regs, not 8 (which might happen if the -+; i64 -> i32, i32 split wasn't handled correctly). -+define i64 @test_2xi64_uses_4_regs([8 x double], float, [2 x i64] %arg) nounwind { -+; CHECK-LABEL: test_2xi64_uses_4_regs: -+; CHECK-DAG: mov r0, r2 -+; CHECK-DAG: mov r1, r3 -+ -+ %val = extractvalue [2 x i64] %arg, 1 -+ ret i64 %val -+} -+ -+; An aggregate should be able to split between registers and stack if there is -+; nothing else on the stack. -+define i32 @test_aggregates_split([8 x double], i32, [4 x i32] %arg) nounwind { -+; CHECK-LABEL: test_aggregates_split: -+; CHECK: ldr [[VAL3:r[0-9]+]], [sp] -+; CHECK: add r0, r1, [[VAL3]] -+ -+ %val0 = extractvalue [4 x i32] %arg, 0 -+ %val3 = extractvalue [4 x i32] %arg, 3 -+ %sum = add i32 %val0, %val3 -+ ret i32 %sum -+} -+ -+; If an aggregate has to be moved entirely onto the stack, nothing should be -+; able to use r0-r3 any more. Also checks that [2 x i64] properly aligned when -+; it uses regs. -+define i32 @test_no_int_backfilling([8 x double], float, i32, [2 x i64], i32 %arg) nounwind { -+; CHECK-LABEL: test_no_int_backfilling: -+; CHECK: ldr r0, [sp, #24] -+ ret i32 %arg -+} -+ -+; Even if the argument was successfully allocated as reg block, there should be -+; no backfillig to r1. -+define i32 @test_no_int_backfilling_regsonly(i32, [1 x i64], i32 %arg) { -+; CHECK-LABEL: test_no_int_backfilling_regsonly: -+; CHECK: ldr r0, [sp] -+ ret i32 %arg -+} -+ -+; If an aggregate has to be moved entirely onto the stack, nothing should be -+; able to use r0-r3 any more. -+define float @test_no_float_backfilling([7 x double], [4 x i32], i32, [4 x double], float %arg) nounwind { -+; CHECK-LABEL: test_no_float_backfilling: -+; CHECK: vldr s0, [sp, #40] -+ ret float %arg -+} -+ -+; They're a bit pointless, but types like [N x i8] should work as well. -+define i8 @test_i8_in_regs(i32, [3 x i8] %arg) { -+; CHECK-LABEL: test_i8_in_regs: -+; CHECK: add r0, r1, r3 -+ %val0 = extractvalue [3 x i8] %arg, 0 -+ %val2 = extractvalue [3 x i8] %arg, 2 -+ %sum = add i8 %val0, %val2 -+ ret i8 %sum -+} -+ -+define i16 @test_i16_split(i32, i32, [3 x i16] %arg) { -+; CHECK-LABEL: test_i16_split: -+; CHECK: ldrh [[VAL2:r[0-9]+]], [sp] -+; CHECK: add r0, r2, [[VAL2]] -+ %val0 = extractvalue [3 x i16] %arg, 0 -+ %val2 = extractvalue [3 x i16] %arg, 2 -+ %sum = add i16 %val0, %val2 -+ ret i16 %sum -+} -+ -+; Beware: on the stack each i16 still gets a 32-bit slot, the array is not -+; packed. -+define i16 @test_i16_forced_stack([8 x double], double, i32, i32, [3 x i16] %arg) { -+; CHECK-LABEL: test_i16_forced_stack: -+; CHECK-DAG: ldrh [[VAL0:r[0-9]+]], [sp, #8] -+; CHECK-DAG: ldrh [[VAL2:r[0-9]+]], [sp, #16] -+; CHECK: add r0, [[VAL0]], [[VAL2]] -+ %val0 = extractvalue [3 x i16] %arg, 0 -+ %val2 = extractvalue [3 x i16] %arg, 2 -+ %sum = add i16 %val0, %val2 -+ ret i16 %sum -+} |