diff options
22 files changed, 723 insertions, 275 deletions
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h index 82c30d3..df7c951 100644 --- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h +++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h @@ -295,7 +295,7 @@ public: } /// Should we be emitting segmented stack stuff for the function - bool shouldSplitStack(); + bool shouldSplitStack() const; /// getNumBlockIDs - Return the number of MBB ID's allocated. /// diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 23816bd..536fc65 100644 --- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -369,6 +369,18 @@ public: (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) | (NoSignedZeros << 6) | (AllowReciprocal << 7); } + + /// Clear any flags in this flag set that aren't also set in Flags. + void intersectWith(const SDNodeFlags *Flags) { + NoUnsignedWrap &= Flags->NoUnsignedWrap; + NoSignedWrap &= Flags->NoSignedWrap; + Exact &= Flags->Exact; + UnsafeAlgebra &= Flags->UnsafeAlgebra; + NoNaNs &= Flags->NoNaNs; + NoInfs &= Flags->NoInfs; + NoSignedZeros &= Flags->NoSignedZeros; + AllowReciprocal &= Flags->AllowReciprocal; + } }; /// Represents one node in the SelectionDAG. @@ -682,6 +694,9 @@ public: /// and directly, but it is not to avoid creating a vtable for this class. const SDNodeFlags *getFlags() const; + /// Clear any flags in this node that aren't also set in Flags. + void intersectFlagsWith(const SDNodeFlags *Flags); + /// Return the number of values defined/returned by this operator. unsigned getNumValues() const { return NumValues; } diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h index 911c6f1..3ae0165 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h @@ -331,6 +331,25 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, /// during lowering by the GC infrastructure. bool callsGCLeafFunction(ImmutableCallSite CS); +//===----------------------------------------------------------------------===// +// Intrinsic pattern matching +// + +/// Try and match a bitreverse or bswap idiom. +/// +/// If an idiom is matched, an intrinsic call is inserted before \c I. Any added +/// instructions are returned in \c InsertedInsts. They will all have been added +/// to a basic block. +/// +/// A bitreverse idiom normally requires around 2*BW nodes to be searched (where +/// BW is the bitwidth of the integer type). A bswap idiom requires anywhere up +/// to BW / 4 nodes to be searched, so is significantly faster. +/// +/// This function returns true on a successful match or false otherwise. +bool recognizeBitReverseOrBSwapIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl<Instruction *> &InsertedInsts); + } // End llvm namespace #endif diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h index 410a075..fc34f49 100644 --- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -125,8 +125,6 @@ private: Value *optimizeStringMemoryLibCall(CallInst *CI, IRBuilder<> &B); // Math Library Optimizations - Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, bool CheckRetType); - Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B); Value *optimizeCos(CallInst *CI, IRBuilder<> &B); Value *optimizePow(CallInst *CI, IRBuilder<> &B); Value *optimizeExp2(CallInst *CI, IRBuilder<> &B); diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp index fd4ee46..c8007a5 100644 --- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5211,6 +5211,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) { return false; } +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +static bool makeBitReverse(Instruction &I, const DataLayout &DL, + const TargetLowering &TLI) { + if (!I.getType()->isIntegerTy() || + !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, + TLI.getValueType(DL, I.getType(), true))) + return false; + + SmallVector<Instruction*, 4> Insts; + if (!recognizeBitReverseOrBSwapIdiom(&I, false, true, Insts)) + return false; + Instruction *LastInst = Insts.back(); + I.replaceAllUsesWith(LastInst); + RecursivelyDeleteTriviallyDeadInstructions(&I); + return true; +} + // In this pass we look for GEP and cast instructions that are used // across basic blocks and rewrite them to improve basic-block-at-a-time // selection. @@ -5224,8 +5242,19 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool& ModifiedDT) { if (ModifiedDT) return true; } - MadeChange |= dupRetToEnableTailCallOpts(&BB); + bool MadeBitReverse = true; + while (TLI && MadeBitReverse) { + MadeBitReverse = false; + for (auto &I : reverse(BB)) { + if (makeBitReverse(I, *DL, *TLI)) { + MadeBitReverse = MadeChange = true; + break; + } + } + } + MadeChange |= dupRetToEnableTailCallOpts(&BB); + return MadeChange; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index ca4bb1c..f6604f3 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -163,7 +163,7 @@ getOrCreateJumpTableInfo(unsigned EntryKind) { } /// Should we be emitting segmented stack stuff for the function -bool MachineFunction::shouldSplitStack() { +bool MachineFunction::shouldSplitStack() const { return getFunction()->hasFnAttribute("split-stack"); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 96bf914..893871f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -377,22 +377,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } } -/// Add logical or fast math flag values to FoldingSetNodeID value. -static void AddNodeIDFlags(FoldingSetNodeID &ID, unsigned Opcode, - const SDNodeFlags *Flags) { - if (!isBinOpWithFlags(Opcode)) - return; - - unsigned RawFlags = 0; - if (Flags) - RawFlags = Flags->getRawFlags(); - ID.AddInteger(RawFlags); -} - -static void AddNodeIDFlags(FoldingSetNodeID &ID, const SDNode *N) { - AddNodeIDFlags(ID, N->getOpcode(), N->getFlags()); -} - static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); @@ -528,8 +512,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { } } // end switch (N->getOpcode()) - AddNodeIDFlags(ID, N); - // Target specific memory nodes could also have address spaces to check. if (N->isTargetMemoryOpcode()) ID.AddInteger(cast<MemSDNode>(N)->getPointerInfo().getAddrSpace()); @@ -851,6 +833,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op, AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + if (Node) + if (const SDNodeFlags *Flags = N->getFlags()) + Node->intersectFlagsWith(Flags); return Node; } @@ -869,6 +854,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + if (Node) + if (const SDNodeFlags *Flags = N->getFlags()) + Node->intersectFlagsWith(Flags); return Node; } @@ -886,6 +874,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops, AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops); AddNodeIDCustom(ID, N); SDNode *Node = FindNodeOrInsertPos(ID, N->getDebugLoc(), InsertPos); + if (Node) + if (const SDNodeFlags *Flags = N->getFlags()) + Node->intersectFlagsWith(Flags); return Node; } @@ -3892,10 +3883,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); - AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DL.getDebugLoc(), IP)) { + if (Flags) + E->intersectFlagsWith(Flags); return SDValue(E, 0); + } N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, Flags); @@ -6249,10 +6242,12 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); - AddNodeIDFlags(ID, Opcode, Flags); void *IP = nullptr; - if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) + if (SDNode *E = FindNodeOrInsertPos(ID, DebugLoc(), IP)) { + if (Flags) + E->intersectFlagsWith(Flags); return E; + } } return nullptr; } @@ -6948,6 +6943,11 @@ const SDNodeFlags *SDNode::getFlags() const { return nullptr; } +void SDNode::intersectFlagsWith(const SDNodeFlags *Flags) { + if (auto *FlagsNode = dyn_cast<BinaryWithFlagsSDNode>(this)) + FlagsNode->Flags.intersectWith(Flags); +} + SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { assert(N->getNumValues() == 1 && "Can't unroll a vector with multiple results!"); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 4ecfbe9..9b73c5e 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -10133,6 +10133,7 @@ void AArch64TargetLowering::insertCopiesSplitCSR( const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (AArch64::GPR64RegClass.contains(*I)) @@ -10152,13 +10153,13 @@ void AArch64TargetLowering::insertCopiesSplitCSR( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); - BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - NewVR) + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - *I) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index d26604f..685907a 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -112,9 +112,21 @@ public: MCELFStreamer::EmitInstruction(Inst, STI); } + /// Emit a 32-bit value as an instruction. This is only used for the .inst + /// directive, EmitInstruction should be used in other cases. void emitInst(uint32_t Inst) { + char Buffer[4]; + + // We can't just use EmitIntValue here, as that will emit a data mapping + // symbol, and swap the endianness on big-endian systems (instructions are + // always little-endian). + for (unsigned I = 0; I < 4; ++I) { + Buffer[I] = uint8_t(Inst); + Inst >>= 8; + } + EmitA64MappingSymbol(); - MCELFStreamer::EmitIntValue(Inst, 4); + MCELFStreamer::EmitBytes(StringRef(Buffer, 4)); } /// This is one of the functions used to emit data into an ELF section, so the diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index 37c0795..978e99c 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12423,6 +12423,7 @@ void ARMTargetLowering::insertCopiesSplitCSR( const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (ARM::GPRRegClass.contains(*I)) @@ -12442,13 +12443,13 @@ void ARMTargetLowering::insertCopiesSplitCSR( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); - BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - NewVR) + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - *I) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td index e8b96e7..ed2e880 100644 --- a/contrib/llvm/lib/Target/X86/X86CallingConv.td +++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td @@ -832,10 +832,10 @@ def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI, R8, R9, R10, R11)>; // CSRs that are handled by prologue, epilogue. -def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>; +def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add RBP)>; // CSRs that are handled explicitly via copies. -def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>; +def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(sub CSR_64_TLS_Darwin, RBP)>; // All GPRs - except r11 def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI, diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp index 8632bb8..7f8ce47 100644 --- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -2031,6 +2031,10 @@ void X86FrameLowering::adjustForSegmentedStacks( unsigned TlsReg, TlsOffset; DebugLoc DL; + // To support shrink-wrapping we would need to insert the new blocks + // at the right place and update the branches to PrologueMBB. + assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); + unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); assert(!MF.getRegInfo().isLiveIn(ScratchReg) && "Scratch register is live-in"); @@ -2271,6 +2275,11 @@ void X86FrameLowering::adjustForHiPEPrologue( MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { MachineFrameInfo *MFI = MF.getFrameInfo(); DebugLoc DL; + + // To support shrink-wrapping we would need to insert the new blocks + // at the right place and update the branches to PrologueMBB. + assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet"); + // HiPE-specific values const unsigned HipeLeafWords = 24; const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; @@ -2584,7 +2593,14 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { // If we may need to emit frameless compact unwind information, give // up as this is currently broken: PR25614. - return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF); + return (MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF)) && + // The lowering of segmented stack and HiPE only support entry blocks + // as prologue blocks: PR26107. + // This limitation may be lifted if we fix: + // - adjustForSegmentedStacks + // - adjustForHiPEPrologue + MF.getFunction()->getCallingConv() != CallingConv::HiPE && + !MF.shouldSplitStack(); } MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp index b723059..6904714 100644 --- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28908,6 +28908,7 @@ void X86TargetLowering::insertCopiesSplitCSR( const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); + MachineBasicBlock::iterator MBBI = Entry->begin(); for (const MCPhysReg *I = IStart; *I; ++I) { const TargetRegisterClass *RC = nullptr; if (X86::GR64RegClass.contains(*I)) @@ -28925,13 +28926,13 @@ void X86TargetLowering::insertCopiesSplitCSR( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"); Entry->addLiveIn(*I); - BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - NewVR) + BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) .addReg(*I); + // Insert the copy-back instructions right before the terminator. for (auto *Exit : Exits) - BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY), - *I) + BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), *I) .addReg(NewVR); } } diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 95c50d3..76cefd9 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -17,6 +17,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Transforms/Utils/CmpInstAnalysis.h" +#include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace PatternMatch; @@ -1565,190 +1566,18 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) { return Changed ? &I : nullptr; } - -/// Analyze the specified subexpression and see if it is capable of providing -/// pieces of a bswap or bitreverse. The subexpression provides a potential -/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in -/// the output of the expression came from a corresponding bit in some other -/// value. This function is recursive, and the end result is a mapping of -/// (value, bitnumber) to bitnumber. It is the caller's responsibility to -/// validate that all `value`s are identical and that the bitnumber to bitnumber -/// mapping is correct for a bswap or bitreverse. -/// -/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know -/// that the expression deposits the low byte of %X into the high byte of the -/// result and that all other bits are zero. This expression is accepted, -/// BitValues[24-31] are set to %X and BitProvenance[24-31] are set to [0-7]. -/// -/// This function returns true if the match was unsuccessful and false if so. -/// On entry to the function the "OverallLeftShift" is a signed integer value -/// indicating the number of bits that the subexpression is later shifted. For -/// example, if the expression is later right shifted by 16 bits, the -/// OverallLeftShift value would be -16 on entry. This is used to specify which -/// bits of BitValues are actually being set. -/// -/// Similarly, BitMask is a bitmask where a bit is clear if its corresponding -/// bit is masked to zero by a user. For example, in (X & 255), X will be -/// processed with a bytemask of 255. BitMask is always in the local -/// (OverallLeftShift) coordinate space. -/// -static bool CollectBitParts(Value *V, int OverallLeftShift, APInt BitMask, - SmallVectorImpl<Value *> &BitValues, - SmallVectorImpl<int> &BitProvenance) { - if (Instruction *I = dyn_cast<Instruction>(V)) { - // If this is an or instruction, it may be an inner node of the bswap. - if (I->getOpcode() == Instruction::Or) - return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance) || - CollectBitParts(I->getOperand(1), OverallLeftShift, BitMask, - BitValues, BitProvenance); - - // If this is a logical shift by a constant, recurse with OverallLeftShift - // and BitMask adjusted. - if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { - unsigned ShAmt = - cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); - // Ensure the shift amount is defined. - if (ShAmt > BitValues.size()) - return true; - - unsigned BitShift = ShAmt; - if (I->getOpcode() == Instruction::Shl) { - // X << C -> collect(X, +C) - OverallLeftShift += BitShift; - BitMask = BitMask.lshr(BitShift); - } else { - // X >>u C -> collect(X, -C) - OverallLeftShift -= BitShift; - BitMask = BitMask.shl(BitShift); - } - - if (OverallLeftShift >= (int)BitValues.size()) - return true; - if (OverallLeftShift <= -(int)BitValues.size()) - return true; - - return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance); - } - - // If this is a logical 'and' with a mask that clears bits, clear the - // corresponding bits in BitMask. - if (I->getOpcode() == Instruction::And && - isa<ConstantInt>(I->getOperand(1))) { - unsigned NumBits = BitValues.size(); - APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); - const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); - - for (unsigned i = 0; i != NumBits; ++i, Bit <<= 1) { - // If this bit is masked out by a later operation, we don't care what - // the and mask is. - if (BitMask[i] == 0) - continue; - - // If the AndMask is zero for this bit, clear the bit. - APInt MaskB = AndMask & Bit; - if (MaskB == 0) { - BitMask.clearBit(i); - continue; - } - - // Otherwise, this bit is kept. - } - - return CollectBitParts(I->getOperand(0), OverallLeftShift, BitMask, - BitValues, BitProvenance); - } - } - - // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be - // the input value to the bswap/bitreverse. To be part of a bswap or - // bitreverse we must be demanding a contiguous range of bits from it. - unsigned InputBitLen = BitMask.countPopulation(); - unsigned InputBitNo = BitMask.countTrailingZeros(); - if (BitMask.getBitWidth() - BitMask.countLeadingZeros() - InputBitNo != - InputBitLen) - // Not a contiguous set range of bits! - return true; - - // We know we're moving a contiguous range of bits from the input to the - // output. Record which bits in the output came from which bits in the input. - unsigned DestBitNo = InputBitNo + OverallLeftShift; - for (unsigned I = 0; I < InputBitLen; ++I) - BitProvenance[DestBitNo + I] = InputBitNo + I; - - // If the destination bit value is already defined, the values are or'd - // together, which isn't a bswap/bitreverse (unless it's an or of the same - // bits). - if (BitValues[DestBitNo] && BitValues[DestBitNo] != V) - return true; - for (unsigned I = 0; I < InputBitLen; ++I) - BitValues[DestBitNo + I] = V; - - return false; -} - -static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, - unsigned BitWidth) { - if (From % 8 != To % 8) - return false; - // Convert from bit indices to byte indices and check for a byte reversal. - From >>= 3; - To >>= 3; - BitWidth >>= 3; - return From == BitWidth - To - 1; -} - -static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, - unsigned BitWidth) { - return From == BitWidth - To - 1; -} - /// Given an OR instruction, check to see if this is a bswap or bitreverse /// idiom. If so, insert the new intrinsic and return it. Instruction *InstCombiner::MatchBSwapOrBitReverse(BinaryOperator &I) { - IntegerType *ITy = dyn_cast<IntegerType>(I.getType()); - if (!ITy) - return nullptr; // Can't do vectors. - unsigned BW = ITy->getBitWidth(); - - /// We keep track of which bit (BitProvenance) inside which value (BitValues) - /// defines each bit in the result. - SmallVector<Value *, 8> BitValues(BW, nullptr); - SmallVector<int, 8> BitProvenance(BW, -1); - - // Try to find all the pieces corresponding to the bswap. - APInt BitMask = APInt::getAllOnesValue(BitValues.size()); - if (CollectBitParts(&I, 0, BitMask, BitValues, BitProvenance)) - return nullptr; - - // Check to see if all of the bits come from the same value. - Value *V = BitValues[0]; - if (!V) return nullptr; // Didn't find a bit? Must be zero. - - if (!std::all_of(BitValues.begin(), BitValues.end(), - [&](const Value *X) { return X == V; })) - return nullptr; - - // Now, is the bit permutation correct for a bswap or a bitreverse? We can - // only byteswap values with an even number of bytes. - bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true;; - for (unsigned i = 0, e = BitValues.size(); i != e; ++i) { - OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); - OKForBitReverse &= - bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); - } - - Intrinsic::ID Intrin; - if (OKForBSwap) - Intrin = Intrinsic::bswap; - else if (OKForBitReverse) - Intrin = Intrinsic::bitreverse; - else + SmallVector<Instruction*, 4> Insts; + if (!recognizeBitReverseOrBSwapIdiom(&I, true, false, Insts)) return nullptr; + Instruction *LastInst = Insts.pop_back_val(); + LastInst->removeFromParent(); - Function *F = Intrinsic::getDeclaration(I.getModule(), Intrin, ITy); - return CallInst::Create(F, V); + for (auto *Inst : Insts) + Worklist.Add(Inst); + return LastInst; } /// We have an expression of the form (A&C)|(B&D). Check if A is (cond?-1:0) diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp index 1457411..79282a2 100644 --- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -179,13 +179,244 @@ void LandingPadInliningInfo::forwardResume( RI->eraseFromParent(); } +/// Helper for getUnwindDestToken/getUnwindDestTokenHelper. +static Value *getParentPad(Value *EHPad) { + if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad)) + return FPI->getParentPad(); + return cast<CatchSwitchInst>(EHPad)->getParentPad(); +} + +typedef DenseMap<Instruction *, Value *> UnwindDestMemoTy; + +/// Helper for getUnwindDestToken that does the descendant-ward part of +/// the search. +static Value *getUnwindDestTokenHelper(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + SmallVector<Instruction *, 8> Worklist(1, EHPad); + + while (!Worklist.empty()) { + Instruction *CurrentPad = Worklist.pop_back_val(); + // We only put pads on the worklist that aren't in the MemoMap. When + // we find an unwind dest for a pad we may update its ancestors, but + // the queue only ever contains uncles/great-uncles/etc. of CurrentPad, + // so they should never get updated while queued on the worklist. + assert(!MemoMap.count(CurrentPad)); + Value *UnwindDestToken = nullptr; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) { + if (CatchSwitch->hasUnwindDest()) { + UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI(); + } else { + // Catchswitch doesn't have a 'nounwind' variant, and one might be + // annotated as "unwinds to caller" when really it's nounwind (see + // e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the + // parent's unwind dest from this. We can check its catchpads' + // descendants, since they might include a cleanuppad with an + // "unwinds to caller" cleanupret, which can be trusted. + for (auto HI = CatchSwitch->handler_begin(), + HE = CatchSwitch->handler_end(); + HI != HE && !UnwindDestToken; ++HI) { + BasicBlock *HandlerBlock = *HI; + auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI()); + for (User *Child : CatchPad->users()) { + // Intentionally ignore invokes here -- since the catchswitch is + // marked "unwind to caller", it would be a verifier error if it + // contained an invoke which unwinds out of it, so any invoke we'd + // encounter must unwind to some child of the catch. + if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child)) + continue; + + Instruction *ChildPad = cast<Instruction>(Child); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't figure out this child pad yet; queue it. + Worklist.push_back(ChildPad); + continue; + } + // We've already checked this child, but might have found that + // it offers no proof either way. + Value *ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + // We already know the child's unwind dest, which can either + // be ConstantTokenNone to indicate unwind to caller, or can + // be another child of the catchpad. Only the former indicates + // the unwind dest of the catchswitch. + if (isa<ConstantTokenNone>(ChildUnwindDestToken)) { + UnwindDestToken = ChildUnwindDestToken; + break; + } + assert(getParentPad(ChildUnwindDestToken) == CatchPad); + } + } + } + } else { + auto *CleanupPad = cast<CleanupPadInst>(CurrentPad); + for (User *U : CleanupPad->users()) { + if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) { + if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest()) + UnwindDestToken = RetUnwindDest->getFirstNonPHI(); + else + UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext()); + break; + } + Value *ChildUnwindDestToken; + if (auto *Invoke = dyn_cast<InvokeInst>(U)) { + ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI(); + } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) { + Instruction *ChildPad = cast<Instruction>(U); + auto Memo = MemoMap.find(ChildPad); + if (Memo == MemoMap.end()) { + // Haven't resolved this child yet; queue it and keep searching. + Worklist.push_back(ChildPad); + continue; + } + // We've checked this child, but still need to ignore it if it + // had no proof either way. + ChildUnwindDestToken = Memo->second; + if (!ChildUnwindDestToken) + continue; + } else { + // Not a relevant user of the cleanuppad + continue; + } + // In a well-formed program, the child/invoke must either unwind to + // an(other) child of the cleanup, or exit the cleanup. In the + // first case, continue searching. + if (isa<Instruction>(ChildUnwindDestToken) && + getParentPad(ChildUnwindDestToken) == CleanupPad) + continue; + UnwindDestToken = ChildUnwindDestToken; + break; + } + } + // If we haven't found an unwind dest for CurrentPad, we may have queued its + // children, so move on to the next in the worklist. + if (!UnwindDestToken) + continue; + + // Now we know that CurrentPad unwinds to UnwindDestToken. It also exits + // any ancestors of CurrentPad up to but not including UnwindDestToken's + // parent pad. Record this in the memo map, and check to see if the + // original EHPad being queried is one of the ones exited. + Value *UnwindParent; + if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken)) + UnwindParent = getParentPad(UnwindPad); + else + UnwindParent = nullptr; + bool ExitedOriginalPad = false; + for (Instruction *ExitedPad = CurrentPad; + ExitedPad && ExitedPad != UnwindParent; + ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) { + // Skip over catchpads since they just follow their catchswitches. + if (isa<CatchPadInst>(ExitedPad)) + continue; + MemoMap[ExitedPad] = UnwindDestToken; + ExitedOriginalPad |= (ExitedPad == EHPad); + } + + if (ExitedOriginalPad) + return UnwindDestToken; + + // Continue the search. + } + + // No definitive information is contained within this funclet. + return nullptr; +} + +/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad, +/// return that pad instruction. If it unwinds to caller, return +/// ConstantTokenNone. If it does not have a definitive unwind destination, +/// return nullptr. +/// +/// This routine gets invoked for calls in funclets in inlinees when inlining +/// an invoke. Since many funclets don't have calls inside them, it's queried +/// on-demand rather than building a map of pads to unwind dests up front. +/// Determining a funclet's unwind dest may require recursively searching its +/// descendants, and also ancestors and cousins if the descendants don't provide +/// an answer. Since most funclets will have their unwind dest immediately +/// available as the unwind dest of a catchswitch or cleanupret, this routine +/// searches top-down from the given pad and then up. To avoid worst-case +/// quadratic run-time given that approach, it uses a memo map to avoid +/// re-processing funclet trees. The callers that rewrite the IR as they go +/// take advantage of this, for correctness, by checking/forcing rewritten +/// pads' entries to match the original callee view. +static Value *getUnwindDestToken(Instruction *EHPad, + UnwindDestMemoTy &MemoMap) { + // Catchpads unwind to the same place as their catchswitch; + // redirct any queries on catchpads so the code below can + // deal with just catchswitches and cleanuppads. + if (auto *CPI = dyn_cast<CatchPadInst>(EHPad)) + EHPad = CPI->getCatchSwitch(); + + // Check if we've already determined the unwind dest for this pad. + auto Memo = MemoMap.find(EHPad); + if (Memo != MemoMap.end()) + return Memo->second; + + // Search EHPad and, if necessary, its descendants. + Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap); + assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0)); + if (UnwindDestToken) + return UnwindDestToken; + + // No information is available for this EHPad from itself or any of its + // descendants. An unwind all the way out to a pad in the caller would + // need also to agree with the unwind dest of the parent funclet, so + // search up the chain to try to find a funclet with information. Put + // null entries in the memo map to avoid re-processing as we go up. + MemoMap[EHPad] = nullptr; + Instruction *LastUselessPad = EHPad; + Value *AncestorToken; + for (AncestorToken = getParentPad(EHPad); + auto *AncestorPad = dyn_cast<Instruction>(AncestorToken); + AncestorToken = getParentPad(AncestorToken)) { + // Skip over catchpads since they just follow their catchswitches. + if (isa<CatchPadInst>(AncestorPad)) + continue; + assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]); + auto AncestorMemo = MemoMap.find(AncestorPad); + if (AncestorMemo == MemoMap.end()) { + UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap); + } else { + UnwindDestToken = AncestorMemo->second; + } + if (UnwindDestToken) + break; + LastUselessPad = AncestorPad; + } + + // Since the whole tree under LastUselessPad has no information, it all must + // match UnwindDestToken; record that to avoid repeating the search. + SmallVector<Instruction *, 8> Worklist(1, LastUselessPad); + while (!Worklist.empty()) { + Instruction *UselessPad = Worklist.pop_back_val(); + assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr); + MemoMap[UselessPad] = UnwindDestToken; + if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) { + for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) + for (User *U : HandlerBlock->getFirstNonPHI()->users()) + if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) + Worklist.push_back(cast<Instruction>(U)); + } else { + assert(isa<CleanupPadInst>(UselessPad)); + for (User *U : UselessPad->users()) + if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U)) + Worklist.push_back(cast<Instruction>(U)); + } + } + + return UnwindDestToken; +} + /// When we inline a basic block into an invoke, /// we have to turn all of the calls that can throw into invokes. /// This function analyze BB to see if there are any calls, and if so, /// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI /// nodes in that block with the values specified in InvokeDestPHIValues. -static BasicBlock * -HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { +static BasicBlock *HandleCallsInBlockInlinedThroughInvoke( + BasicBlock *BB, BasicBlock *UnwindEdge, + UnwindDestMemoTy *FuncletUnwindMap = nullptr) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) { Instruction *I = &*BBI++; @@ -196,6 +427,31 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { if (!CI || CI->doesNotThrow() || isa<InlineAsm>(CI->getCalledValue())) continue; + if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { + // This call is nested inside a funclet. If that funclet has an unwind + // destination within the inlinee, then unwinding out of this call would + // be UB. Rewriting this call to an invoke which targets the inlined + // invoke's unwind dest would give the call's parent funclet multiple + // unwind destinations, which is something that subsequent EH table + // generation can't handle and that the veirifer rejects. So when we + // see such a call, leave it as a call. + auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]); + Value *UnwindDestToken = + getUnwindDestToken(FuncletPad, *FuncletUnwindMap); + if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) + continue; +#ifndef NDEBUG + Instruction *MemoKey; + if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad)) + MemoKey = CatchPad->getCatchSwitch(); + else + MemoKey = FuncletPad; + assert(FuncletUnwindMap->count(MemoKey) && + (*FuncletUnwindMap)[MemoKey] == UnwindDestToken && + "must get memoized to avoid confusing later searches"); +#endif // NDEBUG + } + // Convert this function call into an invoke instruction. First, split the // basic block. BasicBlock *Split = @@ -328,13 +584,23 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, // This connects all the instructions which 'unwind to caller' to the invoke // destination. + UnwindDestMemoTy FuncletUnwindMap; for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) { if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) { if (CRI->unwindsToCaller()) { - CleanupReturnInst::Create(CRI->getCleanupPad(), UnwindDest, CRI); + auto *CleanupPad = CRI->getCleanupPad(); + CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI); CRI->eraseFromParent(); UpdatePHINodes(&*BB); + // Finding a cleanupret with an unwind destination would confuse + // subsequent calls to getUnwindDestToken, so map the cleanuppad + // to short-circuit any such calls and recognize this as an "unwind + // to caller" cleanup. + assert(!FuncletUnwindMap.count(CleanupPad) || + isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad])); + FuncletUnwindMap[CleanupPad] = + ConstantTokenNone::get(Caller->getContext()); } } @@ -345,12 +611,41 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, Instruction *Replacement = nullptr; if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) { if (CatchSwitch->unwindsToCaller()) { + Value *UnwindDestToken; + if (auto *ParentPad = + dyn_cast<Instruction>(CatchSwitch->getParentPad())) { + // This catchswitch is nested inside another funclet. If that + // funclet has an unwind destination within the inlinee, then + // unwinding out of this catchswitch would be UB. Rewriting this + // catchswitch to unwind to the inlined invoke's unwind dest would + // give the parent funclet multiple unwind destinations, which is + // something that subsequent EH table generation can't handle and + // that the veirifer rejects. So when we see such a call, leave it + // as "unwind to caller". + UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap); + if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken)) + continue; + } else { + // This catchswitch has no parent to inherit constraints from, and + // none of its descendants can have an unwind edge that exits it and + // targets another funclet in the inlinee. It may or may not have a + // descendant that definitively has an unwind to caller. In either + // case, we'll have to assume that any unwinds out of it may need to + // be routed to the caller, so treat it as though it has a definitive + // unwind to caller. + UnwindDestToken = ConstantTokenNone::get(Caller->getContext()); + } auto *NewCatchSwitch = CatchSwitchInst::Create( CatchSwitch->getParentPad(), UnwindDest, CatchSwitch->getNumHandlers(), CatchSwitch->getName(), CatchSwitch); for (BasicBlock *PadBB : CatchSwitch->handlers()) NewCatchSwitch->addHandler(PadBB); + // Propagate info for the old catchswitch over to the new one in + // the unwind map. This also serves to short-circuit any subsequent + // checks for the unwind dest of this catchswitch, which would get + // confused if they found the outer handler in the callee. + FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken; Replacement = NewCatchSwitch; } } else if (!isa<FuncletPadInst>(I)) { @@ -369,8 +664,8 @@ static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock, for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end(); BB != E; ++BB) - if (BasicBlock *NewBB = - HandleCallsInBlockInlinedThroughInvoke(&*BB, UnwindDest)) + if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke( + &*BB, UnwindDest, &FuncletUnwindMap)) // Update any PHI nodes in the exceptional block to indicate that there // is now a new entry in them. UpdatePHINodes(NewBB); @@ -1415,6 +1710,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } + // If we are inlining for an invoke instruction, we must make sure to rewrite + // any call instructions into invoke instructions. This is sensitive to which + // funclet pads were top-level in the inlinee, so must be done before + // rewriting the "parent pad" links. + if (auto *II = dyn_cast<InvokeInst>(TheCall)) { + BasicBlock *UnwindDest = II->getUnwindDest(); + Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); + if (isa<LandingPadInst>(FirstNonPHI)) { + HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } else { + HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); + } + } + // Update the lexical scopes of the new funclets and callsites. // Anything that had 'none' as its parent is now nested inside the callsite's // EHPad. @@ -1472,18 +1781,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, } } - // If we are inlining for an invoke instruction, we must make sure to rewrite - // any call instructions into invoke instructions. - if (auto *II = dyn_cast<InvokeInst>(TheCall)) { - BasicBlock *UnwindDest = II->getUnwindDest(); - Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI(); - if (isa<LandingPadInst>(FirstNonPHI)) { - HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo); - } else { - HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo); - } - } - // Handle any inlined musttail call sites. In order for a new call site to be // musttail, the source of the clone and the inlined call site must have been // musttail. Therefore it's safe to return without merging control into the diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp index 1cb65fc..abc9b65 100644 --- a/contrib/llvm/lib/Transforms/Utils/Local.cpp +++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp @@ -1592,3 +1592,205 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) { return false; } + +/// A potential constituent of a bitreverse or bswap expression. See +/// collectBitParts for a fuller explanation. +struct BitPart { + BitPart(Value *P, unsigned BW) : Provider(P) { + Provenance.resize(BW); + } + + /// The Value that this is a bitreverse/bswap of. + Value *Provider; + /// The "provenance" of each bit. Provenance[A] = B means that bit A + /// in Provider becomes bit B in the result of this expression. + SmallVector<int8_t, 32> Provenance; // int8_t means max size is i128. + + enum { Unset = -1 }; +}; + +/// Analyze the specified subexpression and see if it is capable of providing +/// pieces of a bswap or bitreverse. The subexpression provides a potential +/// piece of a bswap or bitreverse if it can be proven that each non-zero bit in +/// the output of the expression came from a corresponding bit in some other +/// value. This function is recursive, and the end result is a mapping of +/// bitnumber to bitnumber. It is the caller's responsibility to validate that +/// the bitnumber to bitnumber mapping is correct for a bswap or bitreverse. +/// +/// For example, if the current subexpression if "(shl i32 %X, 24)" then we know +/// that the expression deposits the low byte of %X into the high byte of the +/// result and that all other bits are zero. This expression is accepted and a +/// BitPart is returned with Provider set to %X and Provenance[24-31] set to +/// [0-7]. +/// +/// To avoid revisiting values, the BitPart results are memoized into the +/// provided map. To avoid unnecessary copying of BitParts, BitParts are +/// constructed in-place in the \c BPS map. Because of this \c BPS needs to +/// store BitParts objects, not pointers. As we need the concept of a nullptr +/// BitParts (Value has been analyzed and the analysis failed), we an Optional +/// type instead to provide the same functionality. +/// +/// Because we pass around references into \c BPS, we must use a container that +/// does not invalidate internal references (std::map instead of DenseMap). +/// +static const Optional<BitPart> & +collectBitParts(Value *V, bool MatchBSwaps, bool MatchBitReversals, + std::map<Value *, Optional<BitPart>> &BPS) { + auto I = BPS.find(V); + if (I != BPS.end()) + return I->second; + + auto &Result = BPS[V] = None; + auto BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + + if (Instruction *I = dyn_cast<Instruction>(V)) { + // If this is an or instruction, it may be an inner node of the bswap. + if (I->getOpcode() == Instruction::Or) { + auto &A = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + auto &B = collectBitParts(I->getOperand(1), MatchBSwaps, + MatchBitReversals, BPS); + if (!A || !B) + return Result; + + // Try and merge the two together. + if (!A->Provider || A->Provider != B->Provider) + return Result; + + Result = BitPart(A->Provider, BitWidth); + for (unsigned i = 0; i < A->Provenance.size(); ++i) { + if (A->Provenance[i] != BitPart::Unset && + B->Provenance[i] != BitPart::Unset && + A->Provenance[i] != B->Provenance[i]) + return Result = None; + + if (A->Provenance[i] == BitPart::Unset) + Result->Provenance[i] = B->Provenance[i]; + else + Result->Provenance[i] = A->Provenance[i]; + } + + return Result; + } + + // If this is a logical shift by a constant, recurse then shift the result. + if (I->isLogicalShift() && isa<ConstantInt>(I->getOperand(1))) { + unsigned BitShift = + cast<ConstantInt>(I->getOperand(1))->getLimitedValue(~0U); + // Ensure the shift amount is defined. + if (BitShift > BitWidth) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + // Perform the "shift" on BitProvenance. + auto &P = Result->Provenance; + if (I->getOpcode() == Instruction::Shl) { + P.erase(std::prev(P.end(), BitShift), P.end()); + P.insert(P.begin(), BitShift, BitPart::Unset); + } else { + P.erase(P.begin(), std::next(P.begin(), BitShift)); + P.insert(P.end(), BitShift, BitPart::Unset); + } + + return Result; + } + + // If this is a logical 'and' with a mask that clears bits, recurse then + // unset the appropriate bits. + if (I->getOpcode() == Instruction::And && + isa<ConstantInt>(I->getOperand(1))) { + APInt Bit(I->getType()->getPrimitiveSizeInBits(), 1); + const APInt &AndMask = cast<ConstantInt>(I->getOperand(1))->getValue(); + + // Check that the mask allows a multiple of 8 bits for a bswap, for an + // early exit. + unsigned NumMaskedBits = AndMask.countPopulation(); + if (!MatchBitReversals && NumMaskedBits % 8 != 0) + return Result; + + auto &Res = collectBitParts(I->getOperand(0), MatchBSwaps, + MatchBitReversals, BPS); + if (!Res) + return Result; + Result = Res; + + for (unsigned i = 0; i < BitWidth; ++i, Bit <<= 1) + // If the AndMask is zero for this bit, clear the bit. + if ((AndMask & Bit) == 0) + Result->Provenance[i] = BitPart::Unset; + + return Result; + } + } + + // Okay, we got to something that isn't a shift, 'or' or 'and'. This must be + // the input value to the bswap/bitreverse. + Result = BitPart(V, BitWidth); + for (unsigned i = 0; i < BitWidth; ++i) + Result->Provenance[i] = i; + return Result; +} + +static bool bitTransformIsCorrectForBSwap(unsigned From, unsigned To, + unsigned BitWidth) { + if (From % 8 != To % 8) + return false; + // Convert from bit indices to byte indices and check for a byte reversal. + From >>= 3; + To >>= 3; + BitWidth >>= 3; + return From == BitWidth - To - 1; +} + +static bool bitTransformIsCorrectForBitReverse(unsigned From, unsigned To, + unsigned BitWidth) { + return From == BitWidth - To - 1; +} + +/// Given an OR instruction, check to see if this is a bitreverse +/// idiom. If so, insert the new intrinsic and return true. +bool llvm::recognizeBitReverseOrBSwapIdiom( + Instruction *I, bool MatchBSwaps, bool MatchBitReversals, + SmallVectorImpl<Instruction *> &InsertedInsts) { + if (Operator::getOpcode(I) != Instruction::Or) + return false; + if (!MatchBSwaps && !MatchBitReversals) + return false; + IntegerType *ITy = dyn_cast<IntegerType>(I->getType()); + if (!ITy || ITy->getBitWidth() > 128) + return false; // Can't do vectors or integers > 128 bits. + unsigned BW = ITy->getBitWidth(); + + // Try to find all the pieces corresponding to the bswap. + std::map<Value *, Optional<BitPart>> BPS; + auto Res = collectBitParts(I, MatchBSwaps, MatchBitReversals, BPS); + if (!Res) + return false; + auto &BitProvenance = Res->Provenance; + + // Now, is the bit permutation correct for a bswap or a bitreverse? We can + // only byteswap values with an even number of bytes. + bool OKForBSwap = BW % 16 == 0, OKForBitReverse = true; + for (unsigned i = 0; i < BW; ++i) { + OKForBSwap &= bitTransformIsCorrectForBSwap(BitProvenance[i], i, BW); + OKForBitReverse &= + bitTransformIsCorrectForBitReverse(BitProvenance[i], i, BW); + } + + Intrinsic::ID Intrin; + if (OKForBSwap && MatchBSwaps) + Intrin = Intrinsic::bswap; + else if (OKForBitReverse && MatchBitReversals) + Intrin = Intrinsic::bitreverse; + else + return false; + + Function *F = Intrinsic::getDeclaration(I->getModule(), Intrin, ITy); + InsertedInsts.push_back(CallInst::Create(F, Res->Provider, "rev", I)); + return true; +} diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index dc07440..2f3c311 100644 --- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -970,15 +970,34 @@ static Value *valueHasFloatPrecision(Value *Val) { return nullptr; } -//===----------------------------------------------------------------------===// -// Double -> Float Shrinking Optimizations for Unary Functions like 'floor' +/// Any floating-point library function that we're trying to simplify will have +/// a signature of the form: fptype foo(fptype param1, fptype param2, ...). +/// CheckDoubleTy indicates that 'fptype' must be 'double'. +static bool matchesFPLibFunctionSignature(const Function *F, unsigned NumParams, + bool CheckDoubleTy) { + FunctionType *FT = F->getFunctionType(); + if (FT->getNumParams() != NumParams) + return false; + + // The return type must match what we're looking for. + Type *RetTy = FT->getReturnType(); + if (CheckDoubleTy ? !RetTy->isDoubleTy() : !RetTy->isFloatingPointTy()) + return false; + + // Each parameter must match the return type, and therefore, match every other + // parameter too. + for (const Type *ParamTy : FT->params()) + if (ParamTy != RetTy) + return false; -Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, - bool CheckRetType) { + return true; +} + +/// Shrink double -> float for unary functions like 'floor'. +static Value *optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, + bool CheckRetType) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - if (FT->getNumParams() != 1 || !FT->getReturnType()->isDoubleTy() || - !FT->getParamType(0)->isDoubleTy()) + if (!matchesFPLibFunctionSignature(Callee, 1, true)) return nullptr; if (CheckRetType) { @@ -1013,15 +1032,10 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B, return B.CreateFPExt(V, B.getDoubleTy()); } -// Double -> Float Shrinking Optimizations for Binary Functions like 'fmin/fmax' -Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { +/// Shrink double -> float for binary functions like 'fmin/fmax'. +static Value *optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - FunctionType *FT = Callee->getFunctionType(); - // Just make sure this has 2 arguments of the same FP type, which match the - // result type. - if (FT->getNumParams() != 2 || FT->getReturnType() != FT->getParamType(0) || - FT->getParamType(0) != FT->getParamType(1) || - !FT->getParamType(0)->isFloatingPointTy()) + if (!matchesFPLibFunctionSignature(Callee, 2, true)) return nullptr; // If this is something like 'fmin((double)floatval1, (double)floatval2)', @@ -1394,12 +1408,21 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) { Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); - + Value *Ret = nullptr; if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" || Callee->getIntrinsicID() == Intrinsic::sqrt)) Ret = optimizeUnaryDoubleFP(CI, B, true); + // FIXME: Refactor - this check is repeated all over this file and even in the + // preceding call to shrink double -> float. + + // Make sure this has 1 argument of FP type, which matches the result type. + FunctionType *FT = Callee->getFunctionType(); + if (FT->getNumParams() != 1 || FT->getReturnType() != FT->getParamType(0) || + !FT->getParamType(0)->isFloatingPointTy()) + return Ret; + if (!CI->hasUnsafeAlgebra()) return Ret; diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp index 3b97ba2..015a739 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -3548,14 +3548,16 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc, E = CGF.EmitAnyExpr(EExpr); CGF.EmitOMPAtomicSimpleUpdateExpr( X, E, BO, /*IsXLHSInRHSPart=*/true, llvm::Monotonic, Loc, - [&CGF, UpExpr, VD, IPriv](RValue XRValue) { + [&CGF, UpExpr, VD, IPriv, Loc](RValue XRValue) { CodeGenFunction::OMPPrivateScope PrivateScope(CGF); - PrivateScope.addPrivate(VD, [&CGF, VD, XRValue]() -> Address { - Address LHSTemp = CGF.CreateMemTemp(VD->getType()); - CGF.EmitStoreThroughLValue( - XRValue, CGF.MakeAddrLValue(LHSTemp, VD->getType())); - return LHSTemp; - }); + PrivateScope.addPrivate( + VD, [&CGF, VD, XRValue, Loc]() -> Address { + Address LHSTemp = CGF.CreateMemTemp(VD->getType()); + CGF.emitOMPSimpleStore( + CGF.MakeAddrLValue(LHSTemp, VD->getType()), XRValue, + VD->getType().getNonReferenceType(), Loc); + return LHSTemp; + }); (void)PrivateScope.Privatize(); return CGF.EmitAnyExpr(UpExpr); }); diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp index 14917c2..6855512 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/contrib/llvm/tools/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2163,17 +2163,17 @@ static void emitSimpleAtomicStore(CodeGenFunction &CGF, bool IsSeqCst, } } -static void emitSimpleStore(CodeGenFunction &CGF, LValue LVal, RValue RVal, - QualType RValTy, SourceLocation Loc) { - switch (CGF.getEvaluationKind(LVal.getType())) { +void CodeGenFunction::emitOMPSimpleStore(LValue LVal, RValue RVal, + QualType RValTy, SourceLocation Loc) { + switch (getEvaluationKind(LVal.getType())) { case TEK_Scalar: - CGF.EmitStoreThroughLValue(RValue::get(convertToScalarValue( - CGF, RVal, RValTy, LVal.getType(), Loc)), - LVal); + EmitStoreThroughLValue(RValue::get(convertToScalarValue( + *this, RVal, RValTy, LVal.getType(), Loc)), + LVal); break; case TEK_Complex: - CGF.EmitStoreOfComplex( - convertToComplexValue(CGF, RVal, RValTy, LVal.getType(), Loc), LVal, + EmitStoreOfComplex( + convertToComplexValue(*this, RVal, RValTy, LVal.getType(), Loc), LVal, /*isInit=*/false); break; case TEK_Aggregate: @@ -2201,7 +2201,7 @@ static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst, // list. if (IsSeqCst) CGF.CGM.getOpenMPRuntime().emitFlush(CGF, llvm::None, Loc); - emitSimpleStore(CGF, VLValue, Res, X->getType().getNonReferenceType(), Loc); + CGF.emitOMPSimpleStore(VLValue, Res, X->getType().getNonReferenceType(), Loc); } static void EmitOMPAtomicWriteExpr(CodeGenFunction &CGF, bool IsSeqCst, @@ -2459,7 +2459,7 @@ static void EmitOMPAtomicCaptureExpr(CodeGenFunction &CGF, bool IsSeqCst, } } // Emit post-update store to 'v' of old/new 'x' value. - emitSimpleStore(CGF, VLValue, NewVVal, NewVValType, Loc); + CGF.emitOMPSimpleStore(VLValue, NewVVal, NewVValType, Loc); // OpenMP, 2.12.6, atomic Construct // Any atomic construct with a seq_cst clause forces the atomically // performed operation to include an implicit flush operation without a diff --git a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h index b3d5035..4803b13 100644 --- a/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h +++ b/contrib/llvm/tools/clang/lib/CodeGen/CodeGenFunction.h @@ -2211,6 +2211,8 @@ public: llvm::Function *GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S); void GenerateOpenMPCapturedVars(const CapturedStmt &S, SmallVectorImpl<llvm::Value *> &CapturedVars); + void emitOMPSimpleStore(LValue LVal, RValue RVal, QualType RValTy, + SourceLocation Loc); /// \brief Perform element by element copying of arrays with type \a /// OriginalType from \a SrcAddr to \a DestAddr using copying procedure /// generated by \a CopyGen. diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp index 67e7cbd..a76ec11 100644 --- a/contrib/llvm/tools/lli/lli.cpp +++ b/contrib/llvm/tools/lli/lli.cpp @@ -16,6 +16,7 @@ #include "OrcLazyJIT.h" #include "RemoteJITUtils.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/CodeGen/LinkAllCodegenComponents.h" @@ -741,11 +742,11 @@ std::unique_ptr<FDRPCChannel> launchRemote() { ChildPath.reset(new char[ChildExecPath.size() + 1]); std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]); ChildPath[ChildExecPath.size()] = '\0'; - std::string ChildInStr = std::to_string(PipeFD[0][0]); + std::string ChildInStr = utostr(PipeFD[0][0]); ChildIn.reset(new char[ChildInStr.size() + 1]); std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]); ChildIn[ChildInStr.size()] = '\0'; - std::string ChildOutStr = std::to_string(PipeFD[1][1]); + std::string ChildOutStr = utostr(PipeFD[1][1]); ChildOut.reset(new char[ChildOutStr.size() + 1]); std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]); ChildOut[ChildOutStr.size()] = '\0'; diff --git a/lib/clang/include/clang/Basic/Version.inc b/lib/clang/include/clang/Basic/Version.inc index edf9cbf..9bd24d7 100644 --- a/lib/clang/include/clang/Basic/Version.inc +++ b/lib/clang/include/clang/Basic/Version.inc @@ -7,4 +7,4 @@ #define CLANG_VENDOR "FreeBSD " -#define SVN_REVISION "257836" +#define SVN_REVISION "258549" |